124 files changed, 0 insertions, 78381 deletions
diff --git a/third_party/aom/av1/encoder/aq_complexity.c b/third_party/aom/av1/encoder/aq_complexity.c
deleted file mode 100644
index 80f8e2e66..000000000
--- a/third_party/aom/av1/encoder/aq_complexity.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-
-#include "av1/encoder/aq_complexity.h"
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/common/seg_common.h"
-#include "av1/encoder/segmentation.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/system_state.h"
-
-#define AQ_C_SEGMENTS 5
-#define DEFAULT_AQ2_SEG 3  // Neutral Q segment
-#define AQ_C_STRENGTHS 3
-static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
-  { 1.75, 1.25, 1.05, 1.00, 0.90 },
-  { 2.00, 1.50, 1.15, 1.00, 0.85 },
-  { 2.50, 1.75, 1.25, 1.00, 0.80 }
-};
-static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
-  { 0.15, 0.30, 0.55, 2.00, 100.0 },
-  { 0.20, 0.40, 0.65, 2.00, 100.0 },
-  { 0.25, 0.50, 0.75, 2.00, 100.0 }
-};
-static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
-  { -4.0, -3.0, -2.0, 100.00, 100.0 },
-  { -3.5, -2.5, -1.5, 100.00, 100.0 },
-  { -3.0, -2.0, -1.0, 100.00, 100.0 }
-};
-
-static int get_aq_c_strength(int q_index, aom_bit_depth_t bit_depth) {
-  // Approximate base quatizer (truncated to int)
-  const int base_quant = av1_ac_quant_Q3(q_index, 0, bit_depth) / 4;
-  return (base_quant > 10) + (base_quant > 25);
-}
-
-void av1_setup_in_frame_q_adj(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  struct segmentation *const seg = &cm->seg;
-  int resolution_change =
-      cm->prev_frame && (cm->width != cm->prev_frame->width ||
-                         cm->height != cm->prev_frame->height);
-
-  // Make SURE use of floating point in this function is safe.
-  aom_clear_system_state();
-
-  if (resolution_change) {
-    memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
-    av1_clearall_segfeatures(seg);
-    av1_disable_segmentation(seg);
-    return;
-  }
-
-  if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
-      cpi->refresh_alt_ref_frame ||
-      (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
-    int segment;
-    const int aq_strength =
-        get_aq_c_strength(cm->base_qindex, cm->seq_params.bit_depth);
-
-    // Clear down the segment map.
-    memset(cpi->segmentation_map, DEFAULT_AQ2_SEG, cm->mi_rows * cm->mi_cols);
-
-    av1_clearall_segfeatures(seg);
-
-    // Segmentation only makes sense if the target bits per SB is above a
-    // threshold. Below this the overheads will usually outweigh any benefit.
-    if (cpi->rc.sb64_target_rate < 256) {
-      av1_disable_segmentation(seg);
-      return;
-    }
-
-    av1_enable_segmentation(seg);
-
-    // Default segment "Q" feature is disabled so it defaults to the baseline Q.
-    av1_disable_segfeature(seg, DEFAULT_AQ2_SEG, SEG_LVL_ALT_Q);
-
-    // Use some of the segments for in frame Q adjustment.
-    for (segment = 0; segment < AQ_C_SEGMENTS; ++segment) {
-      int qindex_delta;
-
-      if (segment == DEFAULT_AQ2_SEG) continue;
-
-      qindex_delta = av1_compute_qdelta_by_rate(
-          &cpi->rc, cm->frame_type, cm->base_qindex,
-          aq_c_q_adj_factor[aq_strength][segment], cm->seq_params.bit_depth);
-
-      // For AQ complexity mode, we dont allow Q0 in a segment if the base
-      // Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
-      // Q delta is sometimes applied without going back around the rd loop.
-      // This could lead to an illegal combination of partition size and q.
-      if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
-        qindex_delta = -cm->base_qindex + 1;
-      }
-      if ((cm->base_qindex + qindex_delta) > 0) {
-        av1_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
-        av1_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
-      }
-    }
-  }
-}
-
-#define DEFAULT_LV_THRESH 10.0
-#define MIN_DEFAULT_LV_THRESH 8.0
-// Select a segment for the current block.
-// The choice of segment for a block depends on the ratio of the projected
-// bits for the block vs a target average and its spatial complexity.
-void av1_caq_select_segment(const AV1_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
-                            int mi_row, int mi_col, int projected_rate) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-
-  const int mi_offset = mi_row * cm->mi_cols + mi_col;
-  const int xmis = AOMMIN(cm->mi_cols - mi_col, mi_size_wide[bs]);
-  const int ymis = AOMMIN(cm->mi_rows - mi_row, mi_size_high[bs]);
-  int x, y;
-  int i;
-  unsigned char segment;
-
-  if (0) {
-    segment = DEFAULT_AQ2_SEG;
-  } else {
-    // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
-    // It is converted to bits << AV1_PROB_COST_SHIFT units.
-    const int64_t num = (int64_t)(cpi->rc.sb64_target_rate * xmis * ymis)
-                        << AV1_PROB_COST_SHIFT;
-    const int denom = cm->seq_params.mib_size * cm->seq_params.mib_size;
-    const int target_rate = (int)(num / denom);
-    double logvar;
-    double low_var_thresh;
-    const int aq_strength =
-        get_aq_c_strength(cm->base_qindex, cm->seq_params.bit_depth);
-
-    aom_clear_system_state();
-    low_var_thresh =
-        (cpi->oxcf.pass == 2)
-            ? AOMMAX(exp(cpi->twopass.mb_av_energy), MIN_DEFAULT_LV_THRESH)
-            : DEFAULT_LV_THRESH;
-
-    av1_setup_src_planes(mb, cpi->source, mi_row, mi_col, num_planes);
-    logvar = av1_log_block_var(cpi, mb, bs);
-
-    segment = AQ_C_SEGMENTS - 1;  // Just in case no break out below.
-    for (i = 0; i < AQ_C_SEGMENTS; ++i) {
-      // Test rate against a threshold value and variance against a threshold.
-      // Increasing segment number (higher variance and complexity) = higher Q.
-      if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) &&
-          (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) {
-        segment = i;
-        break;
-      }
-    }
-  }
-
-  // Fill in the entires in the segment map corresponding to this SB64.
-  for (y = 0; y < ymis; y++) {
-    for (x = 0; x < xmis; x++) {
-      cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment;
-    }
-  }
-}
diff --git a/third_party/aom/av1/encoder/aq_complexity.h b/third_party/aom/av1/encoder/aq_complexity.h
deleted file mode 100644
index 3421d74c9..000000000
--- a/third_party/aom/av1/encoder/aq_complexity.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AQ_COMPLEXITY_H_
-#define AOM_AV1_ENCODER_AQ_COMPLEXITY_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/common/enums.h"
-
-struct AV1_COMP;
-struct macroblock;
-
-// Select a segment for the current Block.
-void av1_caq_select_segment(const struct AV1_COMP *cpi, struct macroblock *,
-                            BLOCK_SIZE bs, int mi_row, int mi_col,
-                            int projected_rate);
-
-// This function sets up a set of segments with delta Q values around
-// the baseline frame quantizer.
-void av1_setup_in_frame_q_adj(struct AV1_COMP *cpi);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_AQ_COMPLEXITY_H_
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.c b/third_party/aom/av1/encoder/aq_cyclicrefresh.c
deleted file mode 100644
index f532d48da..000000000
--- a/third_party/aom/av1/encoder/aq_cyclicrefresh.c
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-
-#include "av1/common/seg_common.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/segmentation.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/system_state.h"
-
-struct CYCLIC_REFRESH {
-  // Percentage of blocks per frame that are targeted as candidates
-  // for cyclic refresh.
-  int percent_refresh;
-  // Maximum q-delta as percentage of base q.
-  int max_qdelta_perc;
-  // Superblock starting index for cycling through the frame.
-  int sb_index;
-  // Controls how long block will need to wait to be refreshed again, in
-  // excess of the cycle time, i.e., in the case of all zero motion, block
-  // will be refreshed every (100/percent_refresh + time_for_refresh) frames.
-  int time_for_refresh;
-  // Target number of (8x8) blocks that are set for delta-q.
-  int target_num_seg_blocks;
-  // Actual number of (8x8) blocks that were applied delta-q.
-  int actual_num_seg1_blocks;
-  int actual_num_seg2_blocks;
-  // RD mult. parameters for segment 1.
-  int rdmult;
-  // Cyclic refresh map.
-  int8_t *map;
-  // Map of the last q a block was coded at.
-  uint8_t *last_coded_q_map;
-  // Thresholds applied to the projected rate/distortion of the coding block,
-  // when deciding whether block should be refreshed.
-  int64_t thresh_rate_sb;
-  int64_t thresh_dist_sb;
-  // Threshold applied to the motion vector (in units of 1/8 pel) of the
-  // coding block, when deciding whether block should be refreshed.
-  int16_t motion_thresh;
-  // Rate target ratio to set q delta.
-  double rate_ratio_qdelta;
-  // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
-  int rate_boost_fac;
-  double low_content_avg;
-  int qindex_delta[3];
-};
-
-CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
-  size_t last_coded_q_map_size;
-  CYCLIC_REFRESH *const cr = aom_calloc(1, sizeof(*cr));
-  if (cr == NULL) return NULL;
-
-  cr->map = aom_calloc(mi_rows * mi_cols, sizeof(*cr->map));
-  if (cr->map == NULL) {
-    av1_cyclic_refresh_free(cr);
-    return NULL;
-  }
-  last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
-  cr->last_coded_q_map = aom_malloc(last_coded_q_map_size);
-  if (cr->last_coded_q_map == NULL) {
-    av1_cyclic_refresh_free(cr);
-    return NULL;
-  }
-  assert(MAXQ <= 255);
-  memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
-
-  return cr;
-}
-
-void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
-  if (cr != NULL) {
-    aom_free(cr->map);
-    aom_free(cr->last_coded_q_map);
-    aom_free(cr);
-  }
-}
-
-// Check if we should turn off cyclic refresh based on bitrate condition.
-static int apply_cyclic_refresh_bitrate(const AV1_COMMON *cm,
-                                        const RATE_CONTROL *rc) {
-  // Turn off cyclic refresh if bits available per frame is not sufficiently
-  // larger than bit cost of segmentation. Segment map bit cost should scale
-  // with number of seg blocks, so compare available bits to number of blocks.
-  // Average bits available per frame = avg_frame_bandwidth
-  // Number of (8x8) blocks in frame = mi_rows * mi_cols;
-  const float factor = 0.25;
-  const int number_blocks = cm->mi_rows * cm->mi_cols;
-  // The condition below corresponds to turning off at target bitrates:
-  // (at 30fps), ~12kbps for CIF, 36kbps for VGA, 100kps for HD/720p.
-  // Also turn off at very small frame sizes, to avoid too large fraction of
-  // superblocks to be refreshed per frame. Threshold below is less than QCIF.
-  if (rc->avg_frame_bandwidth < factor * number_blocks ||
-      number_blocks / 64 < 5)
-    return 0;
-  else
-    return 1;
-}
-
-// Check if this coding block, of size bsize, should be considered for refresh
-// (lower-qp coding). Decision can be based on various factors, such as
-// size of the coding block (i.e., below min_block size rejected), coding
-// mode, and rate/distortion.
-static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
-                                const MB_MODE_INFO *mbmi, int64_t rate,
-                                int64_t dist, int bsize) {
-  MV mv = mbmi->mv[0].as_mv;
-  // Reject the block for lower-qp coding if projected distortion
-  // is above the threshold, and any of the following is true:
-  // 1) mode uses large mv
-  // 2) mode is an intra-mode
-  // Otherwise accept for refresh.
-  if (dist > cr->thresh_dist_sb &&
-      (mv.row > cr->motion_thresh || mv.row < -cr->motion_thresh ||
-       mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh ||
-       !is_inter_block(mbmi)))
-    return CR_SEGMENT_ID_BASE;
-  else if (bsize >= BLOCK_16X16 && rate < cr->thresh_rate_sb &&
-           is_inter_block(mbmi) && mbmi->mv[0].as_int == 0 &&
-           cr->rate_boost_fac > 10)
-    // More aggressive delta-q for bigger blocks with zero motion.
-    return CR_SEGMENT_ID_BOOST2;
-  else
-    return CR_SEGMENT_ID_BOOST1;
-}
-
-// Compute delta-q for the segment.
-static int compute_deltaq(const AV1_COMP *cpi, int q, double rate_factor) {
-  const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  int deltaq =
-      av1_compute_qdelta_by_rate(rc, cpi->common.frame_type, q, rate_factor,
-                                 cpi->common.seq_params.bit_depth);
-  if ((-deltaq) > cr->max_qdelta_perc * q / 100) {
-    deltaq = -cr->max_qdelta_perc * q / 100;
-  }
-  return deltaq;
-}
-
-// For the just encoded frame, estimate the bits, incorporating the delta-q
-// from non-base segment. For now ignore effect of multiple segments
-// (with different delta-q). Note this function is called in the postencode
-// (called from rc_update_rate_correction_factors()).
-int av1_cyclic_refresh_estimate_bits_at_q(const AV1_COMP *cpi,
-                                          double correction_factor) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  int estimated_bits;
-  int mbs = cm->MBs;
-  int num8x8bl = mbs << 2;
-  // Weight for non-base segments: use actual number of blocks refreshed in
-  // previous/just encoded frame. Note number of blocks here is in 8x8 units.
-  double weight_segment1 = (double)cr->actual_num_seg1_blocks / num8x8bl;
-  double weight_segment2 = (double)cr->actual_num_seg2_blocks / num8x8bl;
-  // Take segment weighted average for estimated bits.
-  estimated_bits =
-      (int)((1.0 - weight_segment1 - weight_segment2) *
-                av1_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs,
-                                       correction_factor,
-                                       cm->seq_params.bit_depth) +
-            weight_segment1 * av1_estimate_bits_at_q(
-                                  cm->frame_type,
-                                  cm->base_qindex + cr->qindex_delta[1], mbs,
-                                  correction_factor, cm->seq_params.bit_depth) +
-            weight_segment2 * av1_estimate_bits_at_q(
-                                  cm->frame_type,
-                                  cm->base_qindex + cr->qindex_delta[2], mbs,
-                                  correction_factor, cm->seq_params.bit_depth));
-  return estimated_bits;
-}
-
-// Prior to encoding the frame, estimate the bits per mb, for a given q = i and
-// a corresponding delta-q (for segment 1). This function is called in the
-// rc_regulate_q() to set the base qp index.
-// Note: the segment map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or
-// to 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock, prior to encoding.
-int av1_cyclic_refresh_rc_bits_per_mb(const AV1_COMP *cpi, int i,
-                                      double correction_factor) {
-  const AV1_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  int bits_per_mb;
-  int num8x8bl = cm->MBs << 2;
-  // Weight for segment prior to encoding: take the average of the target
-  // number for the frame to be encoded and the actual from the previous frame.
-  double weight_segment =
-      (double)((cr->target_num_seg_blocks + cr->actual_num_seg1_blocks +
-                cr->actual_num_seg2_blocks) >>
-               1) /
-      num8x8bl;
-  // Compute delta-q corresponding to qindex i.
-  int deltaq = compute_deltaq(cpi, i, cr->rate_ratio_qdelta);
-  // Take segment weighted average for bits per mb.
-  bits_per_mb =
-      (int)((1.0 - weight_segment) *
-                av1_rc_bits_per_mb(cm->frame_type, i, correction_factor,
-                                   cm->seq_params.bit_depth) +
-            weight_segment * av1_rc_bits_per_mb(cm->frame_type, i + deltaq,
-                                                correction_factor,
-                                                cm->seq_params.bit_depth));
-  return bits_per_mb;
-}
-
-// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
-// check if we should reset the segment_id, and update the cyclic_refresh map
-// and segmentation map.
-void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi,
-                                       MB_MODE_INFO *const mbmi, int mi_row,
-                                       int mi_col, BLOCK_SIZE bsize,
-                                       int64_t rate, int64_t dist, int skip) {
-  const AV1_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  const int bw = mi_size_wide[bsize];
-  const int bh = mi_size_high[bsize];
-  const int xmis = AOMMIN(cm->mi_cols - mi_col, bw);
-  const int ymis = AOMMIN(cm->mi_rows - mi_row, bh);
-  const int block_index = mi_row * cm->mi_cols + mi_col;
-  const int refresh_this_block =
-      candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
-  // Default is to not update the refresh map.
-  int new_map_value = cr->map[block_index];
-  int x = 0;
-  int y = 0;
-
-  // If this block is labeled for refresh, check if we should reset the
-  // segment_id.
-  if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
-    mbmi->segment_id = refresh_this_block;
-    // Reset segment_id if will be skipped.
-    if (skip) mbmi->segment_id = CR_SEGMENT_ID_BASE;
-  }
-
-  // Update the cyclic refresh map, to be used for setting segmentation map
-  // for the next frame. If the block  will be refreshed this frame, mark it
-  // as clean. The magnitude of the -ve influences how long before we consider
-  // it for refresh again.
-  if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
-    new_map_value = -cr->time_for_refresh;
-  } else if (refresh_this_block) {
-    // Else if it is accepted as candidate for refresh, and has not already
-    // been refreshed (marked as 1) then mark it as a candidate for cleanup
-    // for future time (marked as 0), otherwise don't update it.
-    if (cr->map[block_index] == 1) new_map_value = 0;
-  } else {
-    // Leave it marked as block that is not candidate for refresh.
-    new_map_value = 1;
-  }
-
-  // Update entries in the cyclic refresh map with new_map_value, and
-  // copy mbmi->segment_id into global segmentation map.
-  for (y = 0; y < ymis; y++)
-    for (x = 0; x < xmis; x++) {
-      int map_offset = block_index + y * cm->mi_cols + x;
-      cr->map[map_offset] = new_map_value;
-      cpi->segmentation_map[map_offset] = mbmi->segment_id;
-      // Inter skip blocks were clearly not coded at the current qindex, so
-      // don't update the map for them. For cases where motion is non-zero or
-      // the reference frame isn't the previous frame, the previous value in
-      // the map for this spatial location is not entirely correct.
-      if ((!is_inter_block(mbmi) || !skip) &&
-          mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
-        cr->last_coded_q_map[map_offset] = clamp(
-            cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
-      } else if (is_inter_block(mbmi) && skip &&
-                 mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
-        cr->last_coded_q_map[map_offset] =
-            AOMMIN(clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id],
-                         0, MAXQ),
-                   cr->last_coded_q_map[map_offset]);
-      }
-    }
-}
-
-// Update the actual number of blocks that were applied the segment delta q.
-void av1_cyclic_refresh_postencode(AV1_COMP *const cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  unsigned char *const seg_map = cpi->segmentation_map;
-  int mi_row, mi_col;
-  cr->actual_num_seg1_blocks = 0;
-  cr->actual_num_seg2_blocks = 0;
-  for (mi_row = 0; mi_row < cm->mi_rows; mi_row++)
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
-      if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) ==
-          CR_SEGMENT_ID_BOOST1)
-        cr->actual_num_seg1_blocks++;
-      else if (cyclic_refresh_segment_id(
-                   seg_map[mi_row * cm->mi_cols + mi_col]) ==
-               CR_SEGMENT_ID_BOOST2)
-        cr->actual_num_seg2_blocks++;
-    }
-}
-
-// Set golden frame update interval, for 1 pass CBR mode.
-void av1_cyclic_refresh_set_golden_update(AV1_COMP *const cpi) {
-  RATE_CONTROL *const rc = &cpi->rc;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  // Set minimum gf_interval for GF update to a multiple (== 2) of refresh
-  // period. Depending on past encoding stats, GF flag may be reset and update
-  // may not occur until next baseline_gf_interval.
-  if (cr->percent_refresh > 0)
-    rc->baseline_gf_interval = 4 * (100 / cr->percent_refresh);
-  else
-    rc->baseline_gf_interval = 40;
-}
-
-// Update some encoding stats (from the just encoded frame). If this frame's
-// background has high motion, refresh the golden frame. Otherwise, if the
-// golden reference is to be updated check if we should NOT update the golden
-// ref.
-void av1_cyclic_refresh_check_golden_update(AV1_COMP *const cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  int mi_row, mi_col;
-  double fraction_low = 0.0;
-  int low_content_frame = 0;
-
-  MB_MODE_INFO **mi;
-  RATE_CONTROL *const rc = &cpi->rc;
-  const int rows = cm->mi_rows, cols = cm->mi_cols;
-  int cnt1 = 0, cnt2 = 0;
-  int force_gf_refresh = 0;
-
-  for (mi_row = 0; mi_row < rows; mi_row++) {
-    mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
-
-    for (mi_col = 0; mi_col < cols; mi_col++) {
-      int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0
-                            ? mi[0]->mv[0].as_mv.row
-                            : -1 * mi[0]->mv[0].as_mv.row;
-      int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0
-                            ? mi[0]->mv[0].as_mv.col
-                            : -1 * mi[0]->mv[0].as_mv.col;
-
-      // Calculate the motion of the background.
-      if (abs_mvr <= 16 && abs_mvc <= 16) {
-        cnt1++;
-        if (abs_mvr == 0 && abs_mvc == 0) cnt2++;
-      }
-      mi++;
-
-      // Accumulate low_content_frame.
-      if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++;
-    }
-  }
-
-  // For video conference clips, if the background has high motion in current
-  // frame because of the camera movement, set this frame as the golden frame.
-  // Use 70% and 5% as the thresholds for golden frame refreshing.
-  if (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1) {
-    av1_cyclic_refresh_set_golden_update(cpi);
-    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-
-    if (rc->frames_till_gf_update_due > rc->frames_to_key)
-      rc->frames_till_gf_update_due = rc->frames_to_key;
-    cpi->refresh_golden_frame = 1;
-    force_gf_refresh = 1;
-  }
-
-  fraction_low = (double)low_content_frame / (rows * cols);
-  // Update average.
-  cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4;
-  if (!force_gf_refresh && cpi->refresh_golden_frame == 1) {
-    // Don't update golden reference if the amount of low_content for the
-    // current encoded frame is small, or if the recursive average of the
-    // low_content over the update interval window falls below threshold.
-    if (fraction_low < 0.8 || cr->low_content_avg < 0.7)
-      cpi->refresh_golden_frame = 0;
-    // Reset for next internal.
-    cr->low_content_avg = fraction_low;
-  }
-}
-
-// Update the segmentation map, and related quantities: cyclic refresh map,
-// refresh sb_index, and target number of blocks to be refreshed.
-// The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to
-// 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock.
-// Blocks labeled as BOOST1 may later get set to BOOST2 (during the
-// encoding of the superblock).
-static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  unsigned char *const seg_map = cpi->segmentation_map;
-  int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
-  int xmis, ymis, x, y;
-  memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols);
-  sb_cols =
-      (cm->mi_cols + cm->seq_params.mib_size - 1) / cm->seq_params.mib_size;
-  sb_rows =
-      (cm->mi_rows + cm->seq_params.mib_size - 1) / cm->seq_params.mib_size;
-  sbs_in_frame = sb_cols * sb_rows;
-  // Number of target blocks to get the q delta (segment 1).
-  block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;
-  // Set the segmentation map: cycle through the superblocks, starting at
-  // cr->mb_index, and stopping when either block_count blocks have been found
-  // to be refreshed, or we have passed through whole frame.
-  if (cr->sb_index >= sbs_in_frame) cr->sb_index = 0;
-  assert(cr->sb_index < sbs_in_frame);
-  i = cr->sb_index;
-  cr->target_num_seg_blocks = 0;
-  do {
-    int sum_map = 0;
-    // Get the mi_row/mi_col corresponding to superblock index i.
-    int sb_row_index = (i / sb_cols);
-    int sb_col_index = i - sb_row_index * sb_cols;
-    int mi_row = sb_row_index * cm->seq_params.mib_size;
-    int mi_col = sb_col_index * cm->seq_params.mib_size;
-    int qindex_thresh =
-        cpi->oxcf.content == AOM_CONTENT_SCREEN
-            ? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
-            : 0;
-    assert(mi_row >= 0 && mi_row < cm->mi_rows);
-    assert(mi_col >= 0 && mi_col < cm->mi_cols);
-    bl_index = mi_row * cm->mi_cols + mi_col;
-    // Loop through all MI blocks in superblock and update map.
-    xmis = AOMMIN(cm->mi_cols - mi_col, cm->seq_params.mib_size);
-    ymis = AOMMIN(cm->mi_rows - mi_row, cm->seq_params.mib_size);
-    for (y = 0; y < ymis; y++) {
-      for (x = 0; x < xmis; x++) {
-        const int bl_index2 = bl_index + y * cm->mi_cols + x;
-        // If the block is as a candidate for clean up then mark it
-        // for possible boost/refresh (segment 1). The segment id may get
-        // reset to 0 later if block gets coded anything other than GLOBALMV.
-        if (cr->map[bl_index2] == 0) {
-          if (cr->last_coded_q_map[bl_index2] > qindex_thresh) sum_map++;
-        } else if (cr->map[bl_index2] < 0) {
-          cr->map[bl_index2]++;
-        }
-      }
-    }
-    // Enforce constant segment over superblock.
-    // If segment is at least half of superblock, set to 1.
-    if (sum_map >= xmis * ymis / 2) {
-      for (y = 0; y < ymis; y++)
-        for (x = 0; x < xmis; x++) {
-          seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1;
-        }
-      cr->target_num_seg_blocks += xmis * ymis;
-    }
-    i++;
-    if (i == sbs_in_frame) {
-      i = 0;
-    }
-  } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index);
-  cr->sb_index = i;
-}
-
-// Set cyclic refresh parameters.
-void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) {
-  const RATE_CONTROL *const rc = &cpi->rc;
-  const AV1_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  cr->percent_refresh = 10;
-  cr->max_qdelta_perc = 50;
-  cr->time_for_refresh = 0;
-  // Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
-  // periods of the refresh cycle, after a key frame.
-  if (rc->frames_since_key < 4 * cr->percent_refresh)
-    cr->rate_ratio_qdelta = 3.0;
-  else
-    cr->rate_ratio_qdelta = 2.0;
-  // Adjust some parameters for low resolutions at low bitrates.
-  if (cm->width <= 352 && cm->height <= 288 && rc->avg_frame_bandwidth < 3400) {
-    cr->motion_thresh = 4;
-    cr->rate_boost_fac = 10;
-  } else {
-    cr->motion_thresh = 32;
-    cr->rate_boost_fac = 17;
-  }
-}
-
-// Setup cyclic background refresh: set delta q and segmentation map.
-void av1_cyclic_refresh_setup(AV1_COMP *const cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  struct segmentation *const seg = &cm->seg;
-  const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
-  int resolution_change =
-      cm->prev_frame && (cm->width != cm->prev_frame->width ||
-                         cm->height != cm->prev_frame->height);
-  if (resolution_change) {
-    memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
-    av1_clearall_segfeatures(seg);
-    aom_clear_system_state();
-    av1_disable_segmentation(seg);
-    return;
-  }
-  if (cm->current_video_frame == 0) cr->low_content_avg = 0.0;
-  // Don't apply refresh on key frame or enhancement layer frames.
-  if (!apply_cyclic_refresh || cm->frame_type == KEY_FRAME) {
-    // Set segmentation map to 0 and disable.
-    unsigned char *const seg_map = cpi->segmentation_map;
-    memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
-    av1_disable_segmentation(&cm->seg);
-    if (cm->frame_type == KEY_FRAME) {
-      memset(cr->last_coded_q_map, MAXQ,
-             cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
-      cr->sb_index = 0;
-    }
-    return;
-  } else {
-    int qindex_delta = 0;
-    int qindex2;
-    const double q =
-        av1_convert_qindex_to_q(cm->base_qindex, cm->seq_params.bit_depth);
-    aom_clear_system_state();
-    // Set rate threshold to some multiple (set to 2 for now) of the target
-    // rate (target is given by sb64_target_rate and scaled by 256).
-    cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2;
-    // Distortion threshold, quadratic in Q, scale factor to be adjusted.
-    // q will not exceed 457, so (q * q) is within 32bit; see:
-    // av1_convert_qindex_to_q(), av1_ac_quant(), ac_qlookup*[].
-    cr->thresh_dist_sb = ((int64_t)(q * q)) << 2;
-
-    // Set up segmentation.
-    // Clear down the segment map.
-    av1_enable_segmentation(&cm->seg);
-    av1_clearall_segfeatures(seg);
-
-    // Note: setting temporal_update has no effect, as the seg-map coding method
-    // (temporal or spatial) is determined in
-    // av1_choose_segmap_coding_method(),
-    // based on the coding cost of each method. For error_resilient mode on the
-    // last_frame_seg_map is set to 0, so if temporal coding is used, it is
-    // relative to 0 previous map.
-    // seg->temporal_update = 0;
-
-    // Segment BASE "Q" feature is disabled so it defaults to the baseline Q.
-    av1_disable_segfeature(seg, CR_SEGMENT_ID_BASE, SEG_LVL_ALT_Q);
-    // Use segment BOOST1 for in-frame Q adjustment.
-    av1_enable_segfeature(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q);
-    // Use segment BOOST2 for more aggressive in-frame Q adjustment.
-    av1_enable_segfeature(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q);
-
-    // Set the q delta for segment BOOST1.
-    qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta);
-    cr->qindex_delta[1] = qindex_delta;
-
-    // Compute rd-mult for segment BOOST1.
-    qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
-
-    cr->rdmult = av1_compute_rd_mult(cpi, qindex2);
-
-    av1_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
-
-    // Set a more aggressive (higher) q delta for segment BOOST2.
-    qindex_delta = compute_deltaq(
-        cpi, cm->base_qindex,
-        AOMMIN(CR_MAX_RATE_TARGET_RATIO,
-               0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
-    cr->qindex_delta[2] = qindex_delta;
-    av1_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
-
-    // Update the segmentation and refresh map.
-    cyclic_refresh_update_map(cpi);
-  }
-}
-
-int av1_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
-  return cr->rdmult;
-}
-
-void av1_cyclic_refresh_reset_resize(AV1_COMP *const cpi) {
-  const AV1_COMMON *const cm = &cpi->common;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
-  cr->sb_index = 0;
-  cpi->refresh_golden_frame = 1;
-}
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.h b/third_party/aom/av1/encoder/aq_cyclicrefresh.h
deleted file mode 100644
index b45781983..000000000
--- a/third_party/aom/av1/encoder/aq_cyclicrefresh.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_
-#define AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_
-
-#include "av1/common/blockd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// The segment ids used in cyclic refresh: from base (no boost) to increasing
-// boost (higher delta-qp).
-#define CR_SEGMENT_ID_BASE 0
-#define CR_SEGMENT_ID_BOOST1 1
-#define CR_SEGMENT_ID_BOOST2 2
-
-// Maximum rate target ratio for setting segment delta-qp.
-#define CR_MAX_RATE_TARGET_RATIO 4.0
-
-struct AV1_COMP;
-
-struct CYCLIC_REFRESH;
-typedef struct CYCLIC_REFRESH CYCLIC_REFRESH;
-
-CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols);
-
-void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr);
-
-// Estimate the bits, incorporating the delta-q from segment 1, after encoding
-// the frame.
-int av1_cyclic_refresh_estimate_bits_at_q(const struct AV1_COMP *cpi,
-                                          double correction_factor);
-
-// Estimate the bits per mb, for a given q = i and a corresponding delta-q
-// (for segment 1), prior to encoding the frame.
-int av1_cyclic_refresh_rc_bits_per_mb(const struct AV1_COMP *cpi, int i,
-                                      double correction_factor);
-
-// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
-// check if we should reset the segment_id, and update the cyclic_refresh map
-// and segmentation map.
-void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi,
-                                       MB_MODE_INFO *const mbmi, int mi_row,
-                                       int mi_col, BLOCK_SIZE bsize,
-                                       int64_t rate, int64_t dist, int skip);
-
-// Update the segmentation map, and related quantities: cyclic refresh map,
-// refresh sb_index, and target number of blocks to be refreshed.
-void av1_cyclic_refresh_update__map(struct AV1_COMP *const cpi);
-
-// Update the actual number of blocks that were applied the segment delta q.
-void av1_cyclic_refresh_postencode(struct AV1_COMP *const cpi);
-
-// Set golden frame update interval, for 1 pass CBR mode.
-void av1_cyclic_refresh_set_golden_update(struct AV1_COMP *const cpi);
-
-// Check if we should not update golden reference, based on past refresh stats.
-void av1_cyclic_refresh_check_golden_update(struct AV1_COMP *const cpi);
-
-// Set/update global/frame level refresh parameters.
-void av1_cyclic_refresh_update_parameters(struct AV1_COMP *const cpi);
-
-// Setup cyclic background refresh: set delta q and segmentation map.
-void av1_cyclic_refresh_setup(struct AV1_COMP *const cpi);
-
-int av1_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
-
-void av1_cyclic_refresh_reset_resize(struct AV1_COMP *const cpi);
-
-static INLINE int cyclic_refresh_segment_id_boosted(int segment_id) {
-  return segment_id == CR_SEGMENT_ID_BOOST1 ||
-         segment_id == CR_SEGMENT_ID_BOOST2;
-}
-
-static INLINE int cyclic_refresh_segment_id(int segment_id) {
-  if (segment_id == CR_SEGMENT_ID_BOOST1)
-    return CR_SEGMENT_ID_BOOST1;
-  else if (segment_id == CR_SEGMENT_ID_BOOST2)
-    return CR_SEGMENT_ID_BOOST2;
-  else
-    return CR_SEGMENT_ID_BASE;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_
diff --git a/third_party/aom/av1/encoder/aq_variance.c b/third_party/aom/av1/encoder/aq_variance.c
deleted file mode 100644
index 58f906bdc..000000000
--- a/third_party/aom/av1/encoder/aq_variance.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "aom_ports/mem.h"
-
-#include "av1/encoder/aq_variance.h"
-#include "av1/common/seg_common.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/dwt.h"
-#include "aom_ports/system_state.h"
-
-static const double rate_ratio[MAX_SEGMENTS] = { 2.2, 1.7, 1.3, 1.0,
-                                                 0.9, .8,  .7,  .6 };
-
-static const double deltaq_rate_ratio[MAX_SEGMENTS] = { 2.5,  2.0, 1.5, 1.0,
-                                                        0.75, 1.0, 1.0, 1.0 };
-#define ENERGY_MIN (-4)
-#define ENERGY_MAX (1)
-#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
-#define ENERGY_IN_BOUNDS(energy) \
-  assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)
-
-DECLARE_ALIGNED(16, static const uint8_t, av1_all_zeros[MAX_SB_SIZE]) = { 0 };
-
-DECLARE_ALIGNED(16, static const uint16_t,
-                av1_highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
-
-static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 };
-
-#define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN]
-
-void av1_vaq_frame_setup(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  struct segmentation *seg = &cm->seg;
-  int i;
-
-  int resolution_change =
-      cm->prev_frame && (cm->width != cm->prev_frame->width ||
-                         cm->height != cm->prev_frame->height);
-  int avg_energy = (int)(cpi->twopass.mb_av_energy - 2);
-  double avg_ratio;
-  if (avg_energy > 7) avg_energy = 7;
-  if (avg_energy < 0) avg_energy = 0;
-  avg_ratio = rate_ratio[avg_energy];
-
-  if (resolution_change) {
-    memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
-    av1_clearall_segfeatures(seg);
-    aom_clear_system_state();
-    av1_disable_segmentation(seg);
-    return;
-  }
-  if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
-      cpi->refresh_alt_ref_frame ||
-      (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
-    cpi->vaq_refresh = 1;
-
-    av1_enable_segmentation(seg);
-    av1_clearall_segfeatures(seg);
-
-    aom_clear_system_state();
-
-    for (i = 0; i < MAX_SEGMENTS; ++i) {
-      // Set up avg segment id to be 1.0 and adjust the other segments around
-      // it.
-      int qindex_delta = av1_compute_qdelta_by_rate(
-          &cpi->rc, cm->frame_type, cm->base_qindex, rate_ratio[i] / avg_ratio,
-          cm->seq_params.bit_depth);
-
-      // We don't allow qindex 0 in a segment if the base value is not 0.
-      // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment
-      // Q delta is sometimes applied without going back around the rd loop.
-      // This could lead to an illegal combination of partition size and q.
-      if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
-        qindex_delta = -cm->base_qindex + 1;
-      }
-
-      av1_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta);
-      av1_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
-    }
-  }
-}
-
-int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
-  // This functions returns a score for the blocks local variance as calculated
-  // by: sum of the log of the (4x4 variances) of each subblock to the current
-  // block (x,bs)
-  // * 32 / number of pixels in the block_size.
-  // This is used for segmentation because to avoid situations in which a large
-  // block with a gentle gradient gets marked high variance even though each
-  // subblock has a low variance.   This allows us to assign the same segment
-  // number for the same sorts of area regardless of how the partitioning goes.
-
-  MACROBLOCKD *xd = &x->e_mbd;
-  double var = 0;
-  unsigned int sse;
-  int i, j;
-
-  int right_overflow =
-      (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
-  int bottom_overflow =
-      (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
-
-  const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow;
-  const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;
-
-  aom_clear_system_state();
-
-  for (i = 0; i < bh; i += 4) {
-    for (j = 0; j < bw; j += 4) {
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        var +=
-            log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf(
-                          x->plane[0].src.buf + i * x->plane[0].src.stride + j,
-                          x->plane[0].src.stride,
-                          CONVERT_TO_BYTEPTR(av1_highbd_all_zeros), 0, &sse) /
-                          16);
-      } else {
-        var +=
-            log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf(
-                          x->plane[0].src.buf + i * x->plane[0].src.stride + j,
-                          x->plane[0].src.stride, av1_all_zeros, 0, &sse) /
-                          16);
-      }
-    }
-  }
-  // Use average of 4x4 log variance. The range for 8 bit 0 - 9.704121561.
-  var /= (bw / 4 * bh / 4);
-  if (var > 7) var = 7;
-
-  aom_clear_system_state();
-  return (int)(var);
-}
-
-#define DEFAULT_E_MIDPOINT 10.0
-
-unsigned int haar_ac_energy(MACROBLOCK *x, BLOCK_SIZE bs) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  int stride = x->plane[0].src.stride;
-  uint8_t *buf = x->plane[0].src.buf;
-  const int bw = MI_SIZE * mi_size_wide[bs];
-  const int bh = MI_SIZE * mi_size_high[bs];
-  int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
-
-  int var = 0;
-  for (int r = 0; r < bh; r += 8)
-    for (int c = 0; c < bw; c += 8) {
-      var += av1_haar_ac_sad_8x8_uint8_input(buf + c + r * stride, stride, hbd);
-    }
-
-  return (unsigned int)((uint64_t)var * 256) >> num_pels_log2_lookup[bs];
-}
-
-double av1_log_block_wavelet_energy(MACROBLOCK *x, BLOCK_SIZE bs) {
-  unsigned int haar_sad = haar_ac_energy(x, bs);
-  aom_clear_system_state();
-  return log(haar_sad + 1.0);
-}
-
-int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x,
-                                   BLOCK_SIZE bs) {
-  double energy, energy_midpoint;
-  aom_clear_system_state();
-  energy_midpoint = (cpi->oxcf.pass == 2) ? cpi->twopass.frame_avg_haar_energy
-                                          : DEFAULT_E_MIDPOINT;
-  energy = av1_log_block_wavelet_energy(x, bs) - energy_midpoint;
-  return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
-}
-
-int av1_compute_deltaq_from_energy_level(const AV1_COMP *const cpi,
-                                         int block_var_level) {
-  int rate_level;
-  const AV1_COMMON *const cm = &cpi->common;
-
-  if (DELTAQ_MODULATION == 1) {
-    ENERGY_IN_BOUNDS(block_var_level);
-    rate_level = SEGMENT_ID(block_var_level);
-  } else {
-    rate_level = block_var_level;
-  }
-  int qindex_delta = av1_compute_qdelta_by_rate(
-      &cpi->rc, cm->frame_type, cm->base_qindex, deltaq_rate_ratio[rate_level],
-      cm->seq_params.bit_depth);
-
-  if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
-    qindex_delta = -cm->base_qindex + 1;
-  }
-  return qindex_delta;
-}
diff --git a/third_party/aom/av1/encoder/aq_variance.h b/third_party/aom/av1/encoder/aq_variance.h
deleted file mode 100644
index 2d22b663e..000000000
--- a/third_party/aom/av1/encoder/aq_variance.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AQ_VARIANCE_H_
-#define AOM_AV1_ENCODER_AQ_VARIANCE_H_
-
-#include "av1/encoder/encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_vaq_frame_setup(AV1_COMP *cpi);
-
-int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
-int av1_compute_deltaq_from_energy_level(const AV1_COMP *const cpi,
-                                         int block_var_level);
-int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x,
-                                   BLOCK_SIZE bs);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_AQ_VARIANCE_H_
diff --git a/third_party/aom/av1/encoder/arm/neon/quantize_neon.c b/third_party/aom/av1/encoder/arm/neon/quantize_neon.c
deleted file mode 100644
index 36e7d3370..000000000
--- a/third_party/aom/av1/encoder/arm/neon/quantize_neon.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include <math.h>
-
-#include "aom_mem/aom_mem.h"
-
-#include "av1/common/quant_common.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/rd.h"
-
-void av1_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
-                          int skip_block, const int16_t *zbin_ptr,
-                          const int16_t *round_ptr, const int16_t *quant_ptr,
-                          const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
-                          int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-                          uint16_t *eob_ptr, const int16_t *scan,
-                          const int16_t *iscan) {
-  // TODO(jingning) Decide the need of these arguments after the
-  // quantization process is completed.
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  (void)scan;
-
-  if (!skip_block) {
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    int i;
-    const int16x8_t v_zero = vdupq_n_s16(0);
-    const int16x8_t v_one = vdupq_n_s16(1);
-    int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1);
-    int16x8_t v_round = vmovq_n_s16(round_ptr[1]);
-    int16x8_t v_quant = vmovq_n_s16(quant_ptr[1]);
-    int16x8_t v_dequant = vmovq_n_s16(dequant_ptr[1]);
-    // adjust for dc
-    v_round = vsetq_lane_s16(round_ptr[0], v_round, 0);
-    v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0);
-    v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0);
-    // process dc and the first seven ac coeffs
-    {
-      const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
-      const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
-      const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-      const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
-      const int32x4_t v_tmp_lo =
-          vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
-      const int32x4_t v_tmp_hi =
-          vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
-      const int16x8_t v_tmp2 =
-          vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
-      const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
-      const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
-      const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
-      const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
-      const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-      const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
-      v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
-      vst1q_s16(&qcoeff_ptr[0], v_qcoeff);
-      vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff);
-      v_round = vmovq_n_s16(round_ptr[1]);
-      v_quant = vmovq_n_s16(quant_ptr[1]);
-      v_dequant = vmovq_n_s16(dequant_ptr[1]);
-    }
-    // now process the rest of the ac coeffs
-    for (i = 8; i < count; i += 8) {
-      const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
-      const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
-      const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-      const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
-      const int32x4_t v_tmp_lo =
-          vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
-      const int32x4_t v_tmp_hi =
-          vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
-      const int16x8_t v_tmp2 =
-          vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
-      const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
-      const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
-      const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
-      const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
-      const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-      const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
-      v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
-      vst1q_s16(&qcoeff_ptr[i], v_qcoeff);
-      vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
-    }
-    {
-      const int16x4_t v_eobmax_3210 = vmax_s16(
-          vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210));
-      const int64x1_t v_eobmax_xx32 =
-          vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-      const int16x4_t v_eobmax_tmp =
-          vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-      const int64x1_t v_eobmax_xxx3 =
-          vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-      const int16x4_t v_eobmax_final =
-          vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-      *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0);
-    }
-  } else {
-    memset(qcoeff_ptr, 0, count * sizeof(int16_t));
-    memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
-    *eob_ptr = 0;
-  }
-}
diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d.c b/third_party/aom/av1/encoder/av1_fwd_txfm1d.c
deleted file mode 100644
index 98505e0b1..000000000
--- a/third_party/aom/av1/encoder/av1_fwd_txfm1d.c
+++ /dev/null
@@ -1,1885 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include "av1/encoder/av1_fwd_txfm1d.h"
-#include "av1/common/av1_txfm.h"
-
-void av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range) {
-  const int32_t size = 4;
-  const int32_t *cospi;
-
-  int32_t stage = 0;
-  int32_t *bf0, *bf1;
-  int32_t step[4];
-
-  // stage 0;
-  av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
-  // stage 1;
-  stage++;
-  bf1 = output;
-  bf1[0] = input[0] + input[3];
-  bf1[1] = input[1] + input[2];
-  bf1[2] = -input[2] + input[1];
-  bf1[3] = -input[3] + input[0];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 2
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
-  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
-  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 3
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[2];
-  bf1[2] = bf0[1];
-  bf1[3] = bf0[3];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range) {
-  const int32_t size = 8;
-  const int32_t *cospi;
-
-  int32_t stage = 0;
-  int32_t *bf0, *bf1;
-  int32_t step[8];
-
-  // stage 0;
-  av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
-  // stage 1;
-  stage++;
-  bf1 = output;
-  bf1[0] = input[0] + input[7];
-  bf1[1] = input[1] + input[6];
-  bf1[2] = input[2] + input[5];
-  bf1[3] = input[3] + input[4];
-  bf1[4] = -input[4] + input[3];
-  bf1[5] = -input[5] + input[2];
-  bf1[6] = -input[6] + input[1];
-  bf1[7] = -input[7] + input[0];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 2
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0] + bf0[3];
-  bf1[1] = bf0[1] + bf0[2];
-  bf1[2] = -bf0[2] + bf0[1];
-  bf1[3] = -bf0[3] + bf0[0];
-  bf1[4] = bf0[4];
-  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
-  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
-  bf1[7] = bf0[7];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 3
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
-  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
-  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
-  bf1[4] = bf0[4] + bf0[5];
-  bf1[5] = -bf0[5] + bf0[4];
-  bf1[6] = -bf0[6] + bf0[7];
-  bf1[7] = bf0[7] + bf0[6];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 4
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
-  bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
-  bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
-  bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 5
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[4];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[6];
-  bf1[4] = bf0[1];
-  bf1[5] = bf0[5];
-  bf1[6] = bf0[3];
-  bf1[7] = bf0[7];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
-  const int32_t size = 16;
-  const int32_t *cospi;
-
-  int32_t stage = 0;
-  int32_t *bf0, *bf1;
-  int32_t step[16];
-
-  // stage 0;
-  av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
-  // stage 1;
-  stage++;
-  bf1 = output;
-  bf1[0] = input[0] + input[15];
-  bf1[1] = input[1] + input[14];
-  bf1[2] = input[2] + input[13];
-  bf1[3] = input[3] + input[12];
-  bf1[4] = input[4] + input[11];
-  bf1[5] = input[5] + input[10];
-  bf1[6] = input[6] + input[9];
-  bf1[7] = input[7] + input[8];
-  bf1[8] = -input[8] + input[7];
-  bf1[9] = -input[9] + input[6];
-  bf1[10] = -input[10] + input[5];
-  bf1[11] = -input[11] + input[4];
-  bf1[12] = -input[12] + input[3];
-  bf1[13] = -input[13] + input[2];
-  bf1[14] = -input[14] + input[1];
-  bf1[15] = -input[15] + input[0];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 2
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0] + bf0[7];
-  bf1[1] = bf0[1] + bf0[6];
-  bf1[2] = bf0[2] + bf0[5];
-  bf1[3] = bf0[3] + bf0[4];
-  bf1[4] = -bf0[4] + bf0[3];
-  bf1[5] = -bf0[5] + bf0[2];
-  bf1[6] = -bf0[6] + bf0[1];
-  bf1[7] = -bf0[7] + bf0[0];
-  bf1[8] = bf0[8];
-  bf1[9] = bf0[9];
-  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
-  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
-  bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
-  bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
-  bf1[14] = bf0[14];
-  bf1[15] = bf0[15];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 3
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[3];
-  bf1[1] = bf0[1] + bf0[2];
-  bf1[2] = -bf0[2] + bf0[1];
-  bf1[3] = -bf0[3] + bf0[0];
-  bf1[4] = bf0[4];
-  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
-  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
-  bf1[7] = bf0[7];
-  bf1[8] = bf0[8] + bf0[11];
-  bf1[9] = bf0[9] + bf0[10];
-  bf1[10] = -bf0[10] + bf0[9];
-  bf1[11] = -bf0[11] + bf0[8];
-  bf1[12] = -bf0[12] + bf0[15];
-  bf1[13] = -bf0[13] + bf0[14];
-  bf1[14] = bf0[14] + bf0[13];
-  bf1[15] = bf0[15] + bf0[12];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 4
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
-  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
-  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
-  bf1[4] = bf0[4] + bf0[5];
-  bf1[5] = -bf0[5] + bf0[4];
-  bf1[6] = -bf0[6] + bf0[7];
-  bf1[7] = bf0[7] + bf0[6];
-  bf1[8] = bf0[8];
-  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
-  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
-  bf1[11] = bf0[11];
-  bf1[12] = bf0[12];
-  bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
-  bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
-  bf1[15] = bf0[15];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 5
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
-  bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
-  bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
-  bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
-  bf1[8] = bf0[8] + bf0[9];
-  bf1[9] = -bf0[9] + bf0[8];
-  bf1[10] = -bf0[10] + bf0[11];
-  bf1[11] = bf0[11] + bf0[10];
-  bf1[12] = bf0[12] + bf0[13];
-  bf1[13] = -bf0[13] + bf0[12];
-  bf1[14] = -bf0[14] + bf0[15];
-  bf1[15] = bf0[15] + bf0[14];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 6
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = bf0[6];
-  bf1[7] = bf0[7];
-  bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
-  bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
-  bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
-  bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
-  bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
-  bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
-  bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
-  bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 7
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[8];
-  bf1[2] = bf0[4];
-  bf1[3] = bf0[12];
-  bf1[4] = bf0[2];
-  bf1[5] = bf0[10];
-  bf1[6] = bf0[6];
-  bf1[7] = bf0[14];
-  bf1[8] = bf0[1];
-  bf1[9] = bf0[9];
-  bf1[10] = bf0[5];
-  bf1[11] = bf0[13];
-  bf1[12] = bf0[3];
-  bf1[13] = bf0[11];
-  bf1[14] = bf0[7];
-  bf1[15] = bf0[15];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
-  const int32_t size = 32;
-  const int32_t *cospi;
-
-  int32_t stage = 0;
-  int32_t *bf0, *bf1;
-  int32_t step[32];
-
-  // stage 0;
-  av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
-  // stage 1;
-  stage++;
-  bf1 = output;
-  bf1[0] = input[0] + input[31];
-  bf1[1] = input[1] + input[30];
-  bf1[2] = input[2] + input[29];
-  bf1[3] = input[3] + input[28];
-  bf1[4] = input[4] + input[27];
-  bf1[5] = input[5] + input[26];
-  bf1[6] = input[6] + input[25];
-  bf1[7] = input[7] + input[24];
-  bf1[8] = input[8] + input[23];
-  bf1[9] = input[9] + input[22];
-  bf1[10] = input[10] + input[21];
-  bf1[11] = input[11] + input[20];
-  bf1[12] = input[12] + input[19];
-  bf1[13] = input[13] + input[18];
-  bf1[14] = input[14] + input[17];
-  bf1[15] = input[15] + input[16];
-  bf1[16] = -input[16] + input[15];
-  bf1[17] = -input[17] + input[14];
-  bf1[18] = -input[18] + input[13];
-  bf1[19] = -input[19] + input[12];
-  bf1[20] = -input[20] + input[11];
-  bf1[21] = -input[21] + input[10];
-  bf1[22] = -input[22] + input[9];
-  bf1[23] = -input[23] + input[8];
-  bf1[24] = -input[24] + input[7];
-  bf1[25] = -input[25] + input[6];
-  bf1[26] = -input[26] + input[5];
-  bf1[27] = -input[27] + input[4];
-  bf1[28] = -input[28] + input[3];
-  bf1[29] = -input[29] + input[2];
-  bf1[30] = -input[30] + input[1];
-  bf1[31] = -input[31] + input[0];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 2
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0] + bf0[15];
-  bf1[1] = bf0[1] + bf0[14];
-  bf1[2] = bf0[2] + bf0[13];
-  bf1[3] = bf0[3] + bf0[12];
-  bf1[4] = bf0[4] + bf0[11];
-  bf1[5] = bf0[5] + bf0[10];
-  bf1[6] = bf0[6] + bf0[9];
-  bf1[7] = bf0[7] + bf0[8];
-  bf1[8] = -bf0[8] + bf0[7];
-  bf1[9] = -bf0[9] + bf0[6];
-  bf1[10] = -bf0[10] + bf0[5];
-  bf1[11] = -bf0[11] + bf0[4];
-  bf1[12] = -bf0[12] + bf0[3];
-  bf1[13] = -bf0[13] + bf0[2];
-  bf1[14] = -bf0[14] + bf0[1];
-  bf1[15] = -bf0[15] + bf0[0];
-  bf1[16] = bf0[16];
-  bf1[17] = bf0[17];
-  bf1[18] = bf0[18];
-  bf1[19] = bf0[19];
-  bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
-  bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
-  bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
-  bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
-  bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit);
-  bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit);
-  bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit);
-  bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit);
-  bf1[28] = bf0[28];
-  bf1[29] = bf0[29];
-  bf1[30] = bf0[30];
-  bf1[31] = bf0[31];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 3
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[7];
-  bf1[1] = bf0[1] + bf0[6];
-  bf1[2] = bf0[2] + bf0[5];
-  bf1[3] = bf0[3] + bf0[4];
-  bf1[4] = -bf0[4] + bf0[3];
-  bf1[5] = -bf0[5] + bf0[2];
-  bf1[6] = -bf0[6] + bf0[1];
-  bf1[7] = -bf0[7] + bf0[0];
-  bf1[8] = bf0[8];
-  bf1[9] = bf0[9];
-  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
-  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
-  bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
-  bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
-  bf1[14] = bf0[14];
-  bf1[15] = bf0[15];
-  bf1[16] = bf0[16] + bf0[23];
-  bf1[17] = bf0[17] + bf0[22];
-  bf1[18] = bf0[18] + bf0[21];
-  bf1[19] = bf0[19] + bf0[20];
-  bf1[20] = -bf0[20] + bf0[19];
-  bf1[21] = -bf0[21] + bf0[18];
-  bf1[22] = -bf0[22] + bf0[17];
-  bf1[23] = -bf0[23] + bf0[16];
-  bf1[24] = -bf0[24] + bf0[31];
-  bf1[25] = -bf0[25] + bf0[30];
-  bf1[26] = -bf0[26] + bf0[29];
-  bf1[27] = -bf0[27] + bf0[28];
-  bf1[28] = bf0[28] + bf0[27];
-  bf1[29] = bf0[29] + bf0[26];
-  bf1[30] = bf0[30] + bf0[25];
-  bf1[31] = bf0[31] + bf0[24];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 4
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0] + bf0[3];
-  bf1[1] = bf0[1] + bf0[2];
-  bf1[2] = -bf0[2] + bf0[1];
-  bf1[3] = -bf0[3] + bf0[0];
-  bf1[4] = bf0[4];
-  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
-  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
-  bf1[7] = bf0[7];
-  bf1[8] = bf0[8] + bf0[11];
-  bf1[9] = bf0[9] + bf0[10];
-  bf1[10] = -bf0[10] + bf0[9];
-  bf1[11] = -bf0[11] + bf0[8];
-  bf1[12] = -bf0[12] + bf0[15];
-  bf1[13] = -bf0[13] + bf0[14];
-  bf1[14] = bf0[14] + bf0[13];
-  bf1[15] = bf0[15] + bf0[12];
-  bf1[16] = bf0[16];
-  bf1[17] = bf0[17];
-  bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
-  bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
-  bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
-  bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
-  bf1[22] = bf0[22];
-  bf1[23] = bf0[23];
-  bf1[24] = bf0[24];
-  bf1[25] = bf0[25];
-  bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit);
-  bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit);
-  bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit);
-  bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit);
-  bf1[30] = bf0[30];
-  bf1[31] = bf0[31];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 5
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
-  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
-  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
-  bf1[4] = bf0[4] + bf0[5];
-  bf1[5] = -bf0[5] + bf0[4];
-  bf1[6] = -bf0[6] + bf0[7];
-  bf1[7] = bf0[7] + bf0[6];
-  bf1[8] = bf0[8];
-  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
-  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
-  bf1[11] = bf0[11];
-  bf1[12] = bf0[12];
-  bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
-  bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
-  bf1[15] = bf0[15];
-  bf1[16] = bf0[16] + bf0[19];
-  bf1[17] = bf0[17] + bf0[18];
-  bf1[18] = -bf0[18] + bf0[17];
-  bf1[19] = -bf0[19] + bf0[16];
-  bf1[20] = -bf0[20] + bf0[23];
-  bf1[21] = -bf0[21] + bf0[22];
-  bf1[22] = bf0[22] + bf0[21];
-  bf1[23] = bf0[23] + bf0[20];
-  bf1[24] = bf0[24] + bf0[27];
-  bf1[25] = bf0[25] + bf0[26];
-  bf1[26] = -bf0[26] + bf0[25];
-  bf1[27] = -bf0[27] + bf0[24];
-  bf1[28] = -bf0[28] + bf0[31];
-  bf1[29] = -bf0[29] + bf0[30];
-  bf1[30] = bf0[30] + bf0[29];
-  bf1[31] = bf0[31] + bf0[28];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 6
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
-  bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
-  bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
-  bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
-  bf1[8] = bf0[8] + bf0[9];
-  bf1[9] = -bf0[9] + bf0[8];
-  bf1[10] = -bf0[10] + bf0[11];
-  bf1[11] = bf0[11] + bf0[10];
-  bf1[12] = bf0[12] + bf0[13];
-  bf1[13] = -bf0[13] + bf0[12];
-  bf1[14] = -bf0[14] + bf0[15];
-  bf1[15] = bf0[15] + bf0[14];
-  bf1[16] = bf0[16];
-  bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
-  bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
-  bf1[19] = bf0[19];
-  bf1[20] = bf0[20];
-  bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
-  bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
-  bf1[23] = bf0[23];
-  bf1[24] = bf0[24];
-  bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit);
-  bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit);
-  bf1[27] = bf0[27];
-  bf1[28] = bf0[28];
-  bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit);
-  bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit);
-  bf1[31] = bf0[31];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 7
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = bf0[6];
-  bf1[7] = bf0[7];
-  bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
-  bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
-  bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
-  bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
-  bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
-  bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
-  bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
-  bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
-  bf1[16] = bf0[16] + bf0[17];
-  bf1[17] = -bf0[17] + bf0[16];
-  bf1[18] = -bf0[18] + bf0[19];
-  bf1[19] = bf0[19] + bf0[18];
-  bf1[20] = bf0[20] + bf0[21];
-  bf1[21] = -bf0[21] + bf0[20];
-  bf1[22] = -bf0[22] + bf0[23];
-  bf1[23] = bf0[23] + bf0[22];
-  bf1[24] = bf0[24] + bf0[25];
-  bf1[25] = -bf0[25] + bf0[24];
-  bf1[26] = -bf0[26] + bf0[27];
-  bf1[27] = bf0[27] + bf0[26];
-  bf1[28] = bf0[28] + bf0[29];
-  bf1[29] = -bf0[29] + bf0[28];
-  bf1[30] = -bf0[30] + bf0[31];
-  bf1[31] = bf0[31] + bf0[30];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 8
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = bf0[6];
-  bf1[7] = bf0[7];
-  bf1[8] = bf0[8];
-  bf1[9] = bf0[9];
-  bf1[10] = bf0[10];
-  bf1[11] = bf0[11];
-  bf1[12] = bf0[12];
-  bf1[13] = bf0[13];
-  bf1[14] = bf0[14];
-  bf1[15] = bf0[15];
-  bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit);
-  bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit);
-  bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit);
-  bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit);
-  bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit);
-  bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit);
-  bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit);
-  bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit);
-  bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit);
-  bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit);
-  bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit);
-  bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit);
-  bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit);
-  bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit);
-  bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit);
-  bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 9
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[16];
-  bf1[2] = bf0[8];
-  bf1[3] = bf0[24];
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[20];
-  bf1[6] = bf0[12];
-  bf1[7] = bf0[28];
-  bf1[8] = bf0[2];
-  bf1[9] = bf0[18];
-  bf1[10] = bf0[10];
-  bf1[11] = bf0[26];
-  bf1[12] = bf0[6];
-  bf1[13] = bf0[22];
-  bf1[14] = bf0[14];
-  bf1[15] = bf0[30];
-  bf1[16] = bf0[1];
-  bf1[17] = bf0[17];
-  bf1[18] = bf0[9];
-  bf1[19] = bf0[25];
-  bf1[20] = bf0[5];
-  bf1[21] = bf0[21];
-  bf1[22] = bf0[13];
-  bf1[23] = bf0[29];
-  bf1[24] = bf0[3];
-  bf1[25] = bf0[19];
-  bf1[26] = bf0[11];
-  bf1[27] = bf0[27];
-  bf1[28] = bf0[7];
-  bf1[29] = bf0[23];
-  bf1[30] = bf0[15];
-  bf1[31] = bf0[31];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
-  int bit = cos_bit;
-  const int32_t *sinpi = sinpi_arr(bit);
-  int32_t x0, x1, x2, x3;
-  int32_t s0, s1, s2, s3, s4, s5, s6, s7;
-
-  // stage 0
-  av1_range_check_buf(0, input, input, 4, stage_range[0]);
-  x0 = input[0];
-  x1 = input[1];
-  x2 = input[2];
-  x3 = input[3];
-
-  if (!(x0 | x1 | x2 | x3)) {
-    output[0] = output[1] = output[2] = output[3] = 0;
-    return;
-  }
-
-  // stage 1
-  s0 = range_check_value(sinpi[1] * x0, bit + stage_range[1]);
-  s1 = range_check_value(sinpi[4] * x0, bit + stage_range[1]);
-  s2 = range_check_value(sinpi[2] * x1, bit + stage_range[1]);
-  s3 = range_check_value(sinpi[1] * x1, bit + stage_range[1]);
-  s4 = range_check_value(sinpi[3] * x2, bit + stage_range[1]);
-  s5 = range_check_value(sinpi[4] * x3, bit + stage_range[1]);
-  s6 = range_check_value(sinpi[2] * x3, bit + stage_range[1]);
-  s7 = range_check_value(x0 + x1, stage_range[1]);
-
-  // stage 2
-  s7 = range_check_value(s7 - x3, stage_range[2]);
-
-  // stage 3
-  x0 = range_check_value(s0 + s2, bit + stage_range[3]);
-  x1 = range_check_value(sinpi[3] * s7, bit + stage_range[3]);
-  x2 = range_check_value(s1 - s3, bit + stage_range[3]);
-  x3 = range_check_value(s4, bit + stage_range[3]);
-
-  // stage 4
-  x0 = range_check_value(x0 + s5, bit + stage_range[4]);
-  x2 = range_check_value(x2 + s6, bit + stage_range[4]);
-
-  // stage 5
-  s0 = range_check_value(x0 + x3, bit + stage_range[5]);
-  s1 = range_check_value(x1, bit + stage_range[5]);
-  s2 = range_check_value(x2 - x3, bit + stage_range[5]);
-  s3 = range_check_value(x2 - x0, bit + stage_range[5]);
-
-  // stage 6
-  s3 = range_check_value(s3 + x3, bit + stage_range[6]);
-
-  // 1-D transform scaling factor is sqrt(2).
-  output[0] = round_shift(s0, bit);
-  output[1] = round_shift(s1, bit);
-  output[2] = round_shift(s2, bit);
-  output[3] = round_shift(s3, bit);
-  av1_range_check_buf(6, input, output, 4, stage_range[6]);
-}
-
-void av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
-  const int32_t size = 8;
-  const int32_t *cospi;
-
-  int32_t stage = 0;
-  int32_t *bf0, *bf1;
-  int32_t step[8];
-
-  // stage 0;
-  av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
-  // stage 1;
-  stage++;
-  assert(output != input);
-  bf1 = output;
-  bf1[0] = input[0];
-  bf1[1] = -input[7];
-  bf1[2] = -input[3];
-  bf1[3] = input[4];
-  bf1[4] = -input[1];
-  bf1[5] = input[6];
-  bf1[6] = input[2];
-  bf1[7] = -input[5];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 2
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
-  bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 3
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[2];
-  bf1[1] = bf0[1] + bf0[3];
-  bf1[2] = bf0[0] - bf0[2];
-  bf1[3] = bf0[1] - bf0[3];
-  bf1[4] = bf0[4] + bf0[6];
-  bf1[5] = bf0[5] + bf0[7];
-  bf1[6] = bf0[4] - bf0[6];
-  bf1[7] = bf0[5] - bf0[7];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 4
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
-  bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
-  bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
-  bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 5
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[4];
-  bf1[1] = bf0[1] + bf0[5];
-  bf1[2] = bf0[2] + bf0[6];
-  bf1[3] = bf0[3] + bf0[7];
-  bf1[4] = bf0[0] - bf0[4];
-  bf1[5] = bf0[1] - bf0[5];
-  bf1[6] = bf0[2] - bf0[6];
-  bf1[7] = bf0[3] - bf0[7];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 6
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit);
-  bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit);
-  bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit);
-  bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit);
-  bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit);
-  bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit);
-  bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 7
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[1];
-  bf1[1] = bf0[6];
-  bf1[2] = bf0[3];
-  bf1[3] = bf0[4];
-  bf1[4] = bf0[5];
-  bf1[5] = bf0[2];
-  bf1[6] = bf0[7];
-  bf1[7] = bf0[0];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                     const int8_t *stage_range) {
-  const int32_t size = 16;
-  const int32_t *cospi;
-
-  int32_t stage = 0;
-  int32_t *bf0, *bf1;
-  int32_t step[16];
-
-  // stage 0;
-  av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
-  // stage 1;
-  stage++;
-  assert(output != input);
-  bf1 = output;
-  bf1[0] = input[0];
-  bf1[1] = -input[15];
-  bf1[2] = -input[7];
-  bf1[3] = input[8];
-  bf1[4] = -input[3];
-  bf1[5] = input[12];
-  bf1[6] = input[4];
-  bf1[7] = -input[11];
-  bf1[8] = -input[1];
-  bf1[9] = input[14];
-  bf1[10] = input[6];
-  bf1[11] = -input[9];
-  bf1[12] = input[2];
-  bf1[13] = -input[13];
-  bf1[14] = -input[5];
-  bf1[15] = input[10];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 2
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
-  bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
-  bf1[8] = bf0[8];
-  bf1[9] = bf0[9];
-  bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
-  bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
-  bf1[12] = bf0[12];
-  bf1[13] = bf0[13];
-  bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
-  bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 3
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[2];
-  bf1[1] = bf0[1] + bf0[3];
-  bf1[2] = bf0[0] - bf0[2];
-  bf1[3] = bf0[1] - bf0[3];
-  bf1[4] = bf0[4] + bf0[6];
-  bf1[5] = bf0[5] + bf0[7];
-  bf1[6] = bf0[4] - bf0[6];
-  bf1[7] = bf0[5] - bf0[7];
-  bf1[8] = bf0[8] + bf0[10];
-  bf1[9] = bf0[9] + bf0[11];
-  bf1[10] = bf0[8] - bf0[10];
-  bf1[11] = bf0[9] - bf0[11];
-  bf1[12] = bf0[12] + bf0[14];
-  bf1[13] = bf0[13] + bf0[15];
-  bf1[14] = bf0[12] - bf0[14];
-  bf1[15] = bf0[13] - bf0[15];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 4
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
-  bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
-  bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
-  bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
-  bf1[8] = bf0[8];
-  bf1[9] = bf0[9];
-  bf1[10] = bf0[10];
-  bf1[11] = bf0[11];
-  bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
-  bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
-  bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
-  bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 5
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[4];
-  bf1[1] = bf0[1] + bf0[5];
-  bf1[2] = bf0[2] + bf0[6];
-  bf1[3] = bf0[3] + bf0[7];
-  bf1[4] = bf0[0] - bf0[4];
-  bf1[5] = bf0[1] - bf0[5];
-  bf1[6] = bf0[2] - bf0[6];
-  bf1[7] = bf0[3] - bf0[7];
-  bf1[8] = bf0[8] + bf0[12];
-  bf1[9] = bf0[9] + bf0[13];
-  bf1[10] = bf0[10] + bf0[14];
-  bf1[11] = bf0[11] + bf0[15];
-  bf1[12] = bf0[8] - bf0[12];
-  bf1[13] = bf0[9] - bf0[13];
-  bf1[14] = bf0[10] - bf0[14];
-  bf1[15] = bf0[11] - bf0[15];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 6
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = bf0[6];
-  bf1[7] = bf0[7];
-  bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
-  bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
-  bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
-  bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
-  bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
-  bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
-  bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
-  bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 7
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[8];
-  bf1[1] = bf0[1] + bf0[9];
-  bf1[2] = bf0[2] + bf0[10];
-  bf1[3] = bf0[3] + bf0[11];
-  bf1[4] = bf0[4] + bf0[12];
-  bf1[5] = bf0[5] + bf0[13];
-  bf1[6] = bf0[6] + bf0[14];
-  bf1[7] = bf0[7] + bf0[15];
-  bf1[8] = bf0[0] - bf0[8];
-  bf1[9] = bf0[1] - bf0[9];
-  bf1[10] = bf0[2] - bf0[10];
-  bf1[11] = bf0[3] - bf0[11];
-  bf1[12] = bf0[4] - bf0[12];
-  bf1[13] = bf0[5] - bf0[13];
-  bf1[14] = bf0[6] - bf0[14];
-  bf1[15] = bf0[7] - bf0[15];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 8
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit);
-  bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit);
-  bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit);
-  bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit);
-  bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit);
-  bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit);
-  bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit);
-  bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit);
-  bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit);
-  bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit);
-  bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit);
-  bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit);
-  bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit);
-  bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit);
-  bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 9
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[1];
-  bf1[1] = bf0[14];
-  bf1[2] = bf0[3];
-  bf1[3] = bf0[12];
-  bf1[4] = bf0[5];
-  bf1[5] = bf0[10];
-  bf1[6] = bf0[7];
-  bf1[7] = bf0[8];
-  bf1[8] = bf0[9];
-  bf1[9] = bf0[6];
-  bf1[10] = bf0[11];
-  bf1[11] = bf0[4];
-  bf1[12] = bf0[13];
-  bf1[13] = bf0[2];
-  bf1[14] = bf0[15];
-  bf1[15] = bf0[0];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
-
-void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
-                      const int8_t *stage_range) {
-  (void)cos_bit;
-  for (int i = 0; i < 4; ++i)
-    output[i] = round_shift((int64_t)input[i] * NewSqrt2, NewSqrt2Bits);
-  assert(stage_range[0] + NewSqrt2Bits <= 32);
-  av1_range_check_buf(0, input, output, 4, stage_range[0]);
-}
-
-void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
-                      const int8_t *stage_range) {
-  (void)cos_bit;
-  for (int i = 0; i < 8; ++i) output[i] = input[i] * 2;
-  av1_range_check_buf(0, input, output, 8, stage_range[0]);
-}
-
-void av1_fidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
-                       const int8_t *stage_range) {
-  (void)cos_bit;
-  for (int i = 0; i < 16; ++i)
-    output[i] = round_shift((int64_t)input[i] * 2 * NewSqrt2, NewSqrt2Bits);
-  assert(stage_range[0] + NewSqrt2Bits <= 32);
-  av1_range_check_buf(0, input, output, 16, stage_range[0]);
-}
-
-void av1_fidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
-                       const int8_t *stage_range) {
-  (void)cos_bit;
-  for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
-  av1_range_check_buf(0, input, output, 32, stage_range[0]);
-}
-
-void av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range) {
-  const int32_t size = 64;
-  const int32_t *cospi;
-
-  int32_t stage = 0;
-  int32_t *bf0, *bf1;
-  int32_t step[64];
-
-  // stage 0;
-  av1_range_check_buf(stage, input, input, size, stage_range[stage]);
-
-  // stage 1;
-  stage++;
-  bf1 = output;
-  bf1[0] = input[0] + input[63];
-  bf1[1] = input[1] + input[62];
-  bf1[2] = input[2] + input[61];
-  bf1[3] = input[3] + input[60];
-  bf1[4] = input[4] + input[59];
-  bf1[5] = input[5] + input[58];
-  bf1[6] = input[6] + input[57];
-  bf1[7] = input[7] + input[56];
-  bf1[8] = input[8] + input[55];
-  bf1[9] = input[9] + input[54];
-  bf1[10] = input[10] + input[53];
-  bf1[11] = input[11] + input[52];
-  bf1[12] = input[12] + input[51];
-  bf1[13] = input[13] + input[50];
-  bf1[14] = input[14] + input[49];
-  bf1[15] = input[15] + input[48];
-  bf1[16] = input[16] + input[47];
-  bf1[17] = input[17] + input[46];
-  bf1[18] = input[18] + input[45];
-  bf1[19] = input[19] + input[44];
-  bf1[20] = input[20] + input[43];
-  bf1[21] = input[21] + input[42];
-  bf1[22] = input[22] + input[41];
-  bf1[23] = input[23] + input[40];
-  bf1[24] = input[24] + input[39];
-  bf1[25] = input[25] + input[38];
-  bf1[26] = input[26] + input[37];
-  bf1[27] = input[27] + input[36];
-  bf1[28] = input[28] + input[35];
-  bf1[29] = input[29] + input[34];
-  bf1[30] = input[30] + input[33];
-  bf1[31] = input[31] + input[32];
-  bf1[32] = -input[32] + input[31];
-  bf1[33] = -input[33] + input[30];
-  bf1[34] = -input[34] + input[29];
-  bf1[35] = -input[35] + input[28];
-  bf1[36] = -input[36] + input[27];
-  bf1[37] = -input[37] + input[26];
-  bf1[38] = -input[38] + input[25];
-  bf1[39] = -input[39] + input[24];
-  bf1[40] = -input[40] + input[23];
-  bf1[41] = -input[41] + input[22];
-  bf1[42] = -input[42] + input[21];
-  bf1[43] = -input[43] + input[20];
-  bf1[44] = -input[44] + input[19];
-  bf1[45] = -input[45] + input[18];
-  bf1[46] = -input[46] + input[17];
-  bf1[47] = -input[47] + input[16];
-  bf1[48] = -input[48] + input[15];
-  bf1[49] = -input[49] + input[14];
-  bf1[50] = -input[50] + input[13];
-  bf1[51] = -input[51] + input[12];
-  bf1[52] = -input[52] + input[11];
-  bf1[53] = -input[53] + input[10];
-  bf1[54] = -input[54] + input[9];
-  bf1[55] = -input[55] + input[8];
-  bf1[56] = -input[56] + input[7];
-  bf1[57] = -input[57] + input[6];
-  bf1[58] = -input[58] + input[5];
-  bf1[59] = -input[59] + input[4];
-  bf1[60] = -input[60] + input[3];
-  bf1[61] = -input[61] + input[2];
-  bf1[62] = -input[62] + input[1];
-  bf1[63] = -input[63] + input[0];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 2
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0] + bf0[31];
-  bf1[1] = bf0[1] + bf0[30];
-  bf1[2] = bf0[2] + bf0[29];
-  bf1[3] = bf0[3] + bf0[28];
-  bf1[4] = bf0[4] + bf0[27];
-  bf1[5] = bf0[5] + bf0[26];
-  bf1[6] = bf0[6] + bf0[25];
-  bf1[7] = bf0[7] + bf0[24];
-  bf1[8] = bf0[8] + bf0[23];
-  bf1[9] = bf0[9] + bf0[22];
-  bf1[10] = bf0[10] + bf0[21];
-  bf1[11] = bf0[11] + bf0[20];
-  bf1[12] = bf0[12] + bf0[19];
-  bf1[13] = bf0[13] + bf0[18];
-  bf1[14] = bf0[14] + bf0[17];
-  bf1[15] = bf0[15] + bf0[16];
-  bf1[16] = -bf0[16] + bf0[15];
-  bf1[17] = -bf0[17] + bf0[14];
-  bf1[18] = -bf0[18] + bf0[13];
-  bf1[19] = -bf0[19] + bf0[12];
-  bf1[20] = -bf0[20] + bf0[11];
-  bf1[21] = -bf0[21] + bf0[10];
-  bf1[22] = -bf0[22] + bf0[9];
-  bf1[23] = -bf0[23] + bf0[8];
-  bf1[24] = -bf0[24] + bf0[7];
-  bf1[25] = -bf0[25] + bf0[6];
-  bf1[26] = -bf0[26] + bf0[5];
-  bf1[27] = -bf0[27] + bf0[4];
-  bf1[28] = -bf0[28] + bf0[3];
-  bf1[29] = -bf0[29] + bf0[2];
-  bf1[30] = -bf0[30] + bf0[1];
-  bf1[31] = -bf0[31] + bf0[0];
-  bf1[32] = bf0[32];
-  bf1[33] = bf0[33];
-  bf1[34] = bf0[34];
-  bf1[35] = bf0[35];
-  bf1[36] = bf0[36];
-  bf1[37] = bf0[37];
-  bf1[38] = bf0[38];
-  bf1[39] = bf0[39];
-  bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
-  bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
-  bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
-  bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
-  bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
-  bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
-  bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
-  bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
-  bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit);
-  bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit);
-  bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit);
-  bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit);
-  bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit);
-  bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit);
-  bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit);
-  bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit);
-  bf1[56] = bf0[56];
-  bf1[57] = bf0[57];
-  bf1[58] = bf0[58];
-  bf1[59] = bf0[59];
-  bf1[60] = bf0[60];
-  bf1[61] = bf0[61];
-  bf1[62] = bf0[62];
-  bf1[63] = bf0[63];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 3
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[15];
-  bf1[1] = bf0[1] + bf0[14];
-  bf1[2] = bf0[2] + bf0[13];
-  bf1[3] = bf0[3] + bf0[12];
-  bf1[4] = bf0[4] + bf0[11];
-  bf1[5] = bf0[5] + bf0[10];
-  bf1[6] = bf0[6] + bf0[9];
-  bf1[7] = bf0[7] + bf0[8];
-  bf1[8] = -bf0[8] + bf0[7];
-  bf1[9] = -bf0[9] + bf0[6];
-  bf1[10] = -bf0[10] + bf0[5];
-  bf1[11] = -bf0[11] + bf0[4];
-  bf1[12] = -bf0[12] + bf0[3];
-  bf1[13] = -bf0[13] + bf0[2];
-  bf1[14] = -bf0[14] + bf0[1];
-  bf1[15] = -bf0[15] + bf0[0];
-  bf1[16] = bf0[16];
-  bf1[17] = bf0[17];
-  bf1[18] = bf0[18];
-  bf1[19] = bf0[19];
-  bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
-  bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
-  bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
-  bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
-  bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit);
-  bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit);
-  bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit);
-  bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit);
-  bf1[28] = bf0[28];
-  bf1[29] = bf0[29];
-  bf1[30] = bf0[30];
-  bf1[31] = bf0[31];
-  bf1[32] = bf0[32] + bf0[47];
-  bf1[33] = bf0[33] + bf0[46];
-  bf1[34] = bf0[34] + bf0[45];
-  bf1[35] = bf0[35] + bf0[44];
-  bf1[36] = bf0[36] + bf0[43];
-  bf1[37] = bf0[37] + bf0[42];
-  bf1[38] = bf0[38] + bf0[41];
-  bf1[39] = bf0[39] + bf0[40];
-  bf1[40] = -bf0[40] + bf0[39];
-  bf1[41] = -bf0[41] + bf0[38];
-  bf1[42] = -bf0[42] + bf0[37];
-  bf1[43] = -bf0[43] + bf0[36];
-  bf1[44] = -bf0[44] + bf0[35];
-  bf1[45] = -bf0[45] + bf0[34];
-  bf1[46] = -bf0[46] + bf0[33];
-  bf1[47] = -bf0[47] + bf0[32];
-  bf1[48] = -bf0[48] + bf0[63];
-  bf1[49] = -bf0[49] + bf0[62];
-  bf1[50] = -bf0[50] + bf0[61];
-  bf1[51] = -bf0[51] + bf0[60];
-  bf1[52] = -bf0[52] + bf0[59];
-  bf1[53] = -bf0[53] + bf0[58];
-  bf1[54] = -bf0[54] + bf0[57];
-  bf1[55] = -bf0[55] + bf0[56];
-  bf1[56] = bf0[56] + bf0[55];
-  bf1[57] = bf0[57] + bf0[54];
-  bf1[58] = bf0[58] + bf0[53];
-  bf1[59] = bf0[59] + bf0[52];
-  bf1[60] = bf0[60] + bf0[51];
-  bf1[61] = bf0[61] + bf0[50];
-  bf1[62] = bf0[62] + bf0[49];
-  bf1[63] = bf0[63] + bf0[48];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 4
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0] + bf0[7];
-  bf1[1] = bf0[1] + bf0[6];
-  bf1[2] = bf0[2] + bf0[5];
-  bf1[3] = bf0[3] + bf0[4];
-  bf1[4] = -bf0[4] + bf0[3];
-  bf1[5] = -bf0[5] + bf0[2];
-  bf1[6] = -bf0[6] + bf0[1];
-  bf1[7] = -bf0[7] + bf0[0];
-  bf1[8] = bf0[8];
-  bf1[9] = bf0[9];
-  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
-  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
-  bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit);
-  bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit);
-  bf1[14] = bf0[14];
-  bf1[15] = bf0[15];
-  bf1[16] = bf0[16] + bf0[23];
-  bf1[17] = bf0[17] + bf0[22];
-  bf1[18] = bf0[18] + bf0[21];
-  bf1[19] = bf0[19] + bf0[20];
-  bf1[20] = -bf0[20] + bf0[19];
-  bf1[21] = -bf0[21] + bf0[18];
-  bf1[22] = -bf0[22] + bf0[17];
-  bf1[23] = -bf0[23] + bf0[16];
-  bf1[24] = -bf0[24] + bf0[31];
-  bf1[25] = -bf0[25] + bf0[30];
-  bf1[26] = -bf0[26] + bf0[29];
-  bf1[27] = -bf0[27] + bf0[28];
-  bf1[28] = bf0[28] + bf0[27];
-  bf1[29] = bf0[29] + bf0[26];
-  bf1[30] = bf0[30] + bf0[25];
-  bf1[31] = bf0[31] + bf0[24];
-  bf1[32] = bf0[32];
-  bf1[33] = bf0[33];
-  bf1[34] = bf0[34];
-  bf1[35] = bf0[35];
-  bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit);
-  bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit);
-  bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit);
-  bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit);
-  bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit);
-  bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit);
-  bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit);
-  bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit);
-  bf1[44] = bf0[44];
-  bf1[45] = bf0[45];
-  bf1[46] = bf0[46];
-  bf1[47] = bf0[47];
-  bf1[48] = bf0[48];
-  bf1[49] = bf0[49];
-  bf1[50] = bf0[50];
-  bf1[51] = bf0[51];
-  bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit);
-  bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit);
-  bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit);
-  bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit);
-  bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit);
-  bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit);
-  bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit);
-  bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit);
-  bf1[60] = bf0[60];
-  bf1[61] = bf0[61];
-  bf1[62] = bf0[62];
-  bf1[63] = bf0[63];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 5
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0] + bf0[3];
-  bf1[1] = bf0[1] + bf0[2];
-  bf1[2] = -bf0[2] + bf0[1];
-  bf1[3] = -bf0[3] + bf0[0];
-  bf1[4] = bf0[4];
-  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
-  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit);
-  bf1[7] = bf0[7];
-  bf1[8] = bf0[8] + bf0[11];
-  bf1[9] = bf0[9] + bf0[10];
-  bf1[10] = -bf0[10] + bf0[9];
-  bf1[11] = -bf0[11] + bf0[8];
-  bf1[12] = -bf0[12] + bf0[15];
-  bf1[13] = -bf0[13] + bf0[14];
-  bf1[14] = bf0[14] + bf0[13];
-  bf1[15] = bf0[15] + bf0[12];
-  bf1[16] = bf0[16];
-  bf1[17] = bf0[17];
-  bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
-  bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
-  bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
-  bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
-  bf1[22] = bf0[22];
-  bf1[23] = bf0[23];
-  bf1[24] = bf0[24];
-  bf1[25] = bf0[25];
-  bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit);
-  bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit);
-  bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit);
-  bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit);
-  bf1[30] = bf0[30];
-  bf1[31] = bf0[31];
-  bf1[32] = bf0[32] + bf0[39];
-  bf1[33] = bf0[33] + bf0[38];
-  bf1[34] = bf0[34] + bf0[37];
-  bf1[35] = bf0[35] + bf0[36];
-  bf1[36] = -bf0[36] + bf0[35];
-  bf1[37] = -bf0[37] + bf0[34];
-  bf1[38] = -bf0[38] + bf0[33];
-  bf1[39] = -bf0[39] + bf0[32];
-  bf1[40] = -bf0[40] + bf0[47];
-  bf1[41] = -bf0[41] + bf0[46];
-  bf1[42] = -bf0[42] + bf0[45];
-  bf1[43] = -bf0[43] + bf0[44];
-  bf1[44] = bf0[44] + bf0[43];
-  bf1[45] = bf0[45] + bf0[42];
-  bf1[46] = bf0[46] + bf0[41];
-  bf1[47] = bf0[47] + bf0[40];
-  bf1[48] = bf0[48] + bf0[55];
-  bf1[49] = bf0[49] + bf0[54];
-  bf1[50] = bf0[50] + bf0[53];
-  bf1[51] = bf0[51] + bf0[52];
-  bf1[52] = -bf0[52] + bf0[51];
-  bf1[53] = -bf0[53] + bf0[50];
-  bf1[54] = -bf0[54] + bf0[49];
-  bf1[55] = -bf0[55] + bf0[48];
-  bf1[56] = -bf0[56] + bf0[63];
-  bf1[57] = -bf0[57] + bf0[62];
-  bf1[58] = -bf0[58] + bf0[61];
-  bf1[59] = -bf0[59] + bf0[60];
-  bf1[60] = bf0[60] + bf0[59];
-  bf1[61] = bf0[61] + bf0[58];
-  bf1[62] = bf0[62] + bf0[57];
-  bf1[63] = bf0[63] + bf0[56];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 6
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
-  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit);
-  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit);
-  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit);
-  bf1[4] = bf0[4] + bf0[5];
-  bf1[5] = -bf0[5] + bf0[4];
-  bf1[6] = -bf0[6] + bf0[7];
-  bf1[7] = bf0[7] + bf0[6];
-  bf1[8] = bf0[8];
-  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
-  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
-  bf1[11] = bf0[11];
-  bf1[12] = bf0[12];
-  bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit);
-  bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit);
-  bf1[15] = bf0[15];
-  bf1[16] = bf0[16] + bf0[19];
-  bf1[17] = bf0[17] + bf0[18];
-  bf1[18] = -bf0[18] + bf0[17];
-  bf1[19] = -bf0[19] + bf0[16];
-  bf1[20] = -bf0[20] + bf0[23];
-  bf1[21] = -bf0[21] + bf0[22];
-  bf1[22] = bf0[22] + bf0[21];
-  bf1[23] = bf0[23] + bf0[20];
-  bf1[24] = bf0[24] + bf0[27];
-  bf1[25] = bf0[25] + bf0[26];
-  bf1[26] = -bf0[26] + bf0[25];
-  bf1[27] = -bf0[27] + bf0[24];
-  bf1[28] = -bf0[28] + bf0[31];
-  bf1[29] = -bf0[29] + bf0[30];
-  bf1[30] = bf0[30] + bf0[29];
-  bf1[31] = bf0[31] + bf0[28];
-  bf1[32] = bf0[32];
-  bf1[33] = bf0[33];
-  bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit);
-  bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit);
-  bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit);
-  bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit);
-  bf1[38] = bf0[38];
-  bf1[39] = bf0[39];
-  bf1[40] = bf0[40];
-  bf1[41] = bf0[41];
-  bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit);
-  bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit);
-  bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit);
-  bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit);
-  bf1[46] = bf0[46];
-  bf1[47] = bf0[47];
-  bf1[48] = bf0[48];
-  bf1[49] = bf0[49];
-  bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit);
-  bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit);
-  bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit);
-  bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit);
-  bf1[54] = bf0[54];
-  bf1[55] = bf0[55];
-  bf1[56] = bf0[56];
-  bf1[57] = bf0[57];
-  bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit);
-  bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit);
-  bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit);
-  bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit);
-  bf1[62] = bf0[62];
-  bf1[63] = bf0[63];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 7
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit);
-  bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit);
-  bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit);
-  bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit);
-  bf1[8] = bf0[8] + bf0[9];
-  bf1[9] = -bf0[9] + bf0[8];
-  bf1[10] = -bf0[10] + bf0[11];
-  bf1[11] = bf0[11] + bf0[10];
-  bf1[12] = bf0[12] + bf0[13];
-  bf1[13] = -bf0[13] + bf0[12];
-  bf1[14] = -bf0[14] + bf0[15];
-  bf1[15] = bf0[15] + bf0[14];
-  bf1[16] = bf0[16];
-  bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
-  bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
-  bf1[19] = bf0[19];
-  bf1[20] = bf0[20];
-  bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
-  bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
-  bf1[23] = bf0[23];
-  bf1[24] = bf0[24];
-  bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit);
-  bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit);
-  bf1[27] = bf0[27];
-  bf1[28] = bf0[28];
-  bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit);
-  bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit);
-  bf1[31] = bf0[31];
-  bf1[32] = bf0[32] + bf0[35];
-  bf1[33] = bf0[33] + bf0[34];
-  bf1[34] = -bf0[34] + bf0[33];
-  bf1[35] = -bf0[35] + bf0[32];
-  bf1[36] = -bf0[36] + bf0[39];
-  bf1[37] = -bf0[37] + bf0[38];
-  bf1[38] = bf0[38] + bf0[37];
-  bf1[39] = bf0[39] + bf0[36];
-  bf1[40] = bf0[40] + bf0[43];
-  bf1[41] = bf0[41] + bf0[42];
-  bf1[42] = -bf0[42] + bf0[41];
-  bf1[43] = -bf0[43] + bf0[40];
-  bf1[44] = -bf0[44] + bf0[47];
-  bf1[45] = -bf0[45] + bf0[46];
-  bf1[46] = bf0[46] + bf0[45];
-  bf1[47] = bf0[47] + bf0[44];
-  bf1[48] = bf0[48] + bf0[51];
-  bf1[49] = bf0[49] + bf0[50];
-  bf1[50] = -bf0[50] + bf0[49];
-  bf1[51] = -bf0[51] + bf0[48];
-  bf1[52] = -bf0[52] + bf0[55];
-  bf1[53] = -bf0[53] + bf0[54];
-  bf1[54] = bf0[54] + bf0[53];
-  bf1[55] = bf0[55] + bf0[52];
-  bf1[56] = bf0[56] + bf0[59];
-  bf1[57] = bf0[57] + bf0[58];
-  bf1[58] = -bf0[58] + bf0[57];
-  bf1[59] = -bf0[59] + bf0[56];
-  bf1[60] = -bf0[60] + bf0[63];
-  bf1[61] = -bf0[61] + bf0[62];
-  bf1[62] = bf0[62] + bf0[61];
-  bf1[63] = bf0[63] + bf0[60];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 8
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = bf0[6];
-  bf1[7] = bf0[7];
-  bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit);
-  bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit);
-  bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit);
-  bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit);
-  bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit);
-  bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit);
-  bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit);
-  bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit);
-  bf1[16] = bf0[16] + bf0[17];
-  bf1[17] = -bf0[17] + bf0[16];
-  bf1[18] = -bf0[18] + bf0[19];
-  bf1[19] = bf0[19] + bf0[18];
-  bf1[20] = bf0[20] + bf0[21];
-  bf1[21] = -bf0[21] + bf0[20];
-  bf1[22] = -bf0[22] + bf0[23];
-  bf1[23] = bf0[23] + bf0[22];
-  bf1[24] = bf0[24] + bf0[25];
-  bf1[25] = -bf0[25] + bf0[24];
-  bf1[26] = -bf0[26] + bf0[27];
-  bf1[27] = bf0[27] + bf0[26];
-  bf1[28] = bf0[28] + bf0[29];
-  bf1[29] = -bf0[29] + bf0[28];
-  bf1[30] = -bf0[30] + bf0[31];
-  bf1[31] = bf0[31] + bf0[30];
-  bf1[32] = bf0[32];
-  bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit);
-  bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit);
-  bf1[35] = bf0[35];
-  bf1[36] = bf0[36];
-  bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit);
-  bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit);
-  bf1[39] = bf0[39];
-  bf1[40] = bf0[40];
-  bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit);
-  bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit);
-  bf1[43] = bf0[43];
-  bf1[44] = bf0[44];
-  bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit);
-  bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit);
-  bf1[47] = bf0[47];
-  bf1[48] = bf0[48];
-  bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit);
-  bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit);
-  bf1[51] = bf0[51];
-  bf1[52] = bf0[52];
-  bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit);
-  bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit);
-  bf1[55] = bf0[55];
-  bf1[56] = bf0[56];
-  bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit);
-  bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit);
-  bf1[59] = bf0[59];
-  bf1[60] = bf0[60];
-  bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit);
-  bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit);
-  bf1[63] = bf0[63];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 9
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = bf0[6];
-  bf1[7] = bf0[7];
-  bf1[8] = bf0[8];
-  bf1[9] = bf0[9];
-  bf1[10] = bf0[10];
-  bf1[11] = bf0[11];
-  bf1[12] = bf0[12];
-  bf1[13] = bf0[13];
-  bf1[14] = bf0[14];
-  bf1[15] = bf0[15];
-  bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit);
-  bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit);
-  bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit);
-  bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit);
-  bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit);
-  bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit);
-  bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit);
-  bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit);
-  bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit);
-  bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit);
-  bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit);
-  bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit);
-  bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit);
-  bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit);
-  bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit);
-  bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit);
-  bf1[32] = bf0[32] + bf0[33];
-  bf1[33] = -bf0[33] + bf0[32];
-  bf1[34] = -bf0[34] + bf0[35];
-  bf1[35] = bf0[35] + bf0[34];
-  bf1[36] = bf0[36] + bf0[37];
-  bf1[37] = -bf0[37] + bf0[36];
-  bf1[38] = -bf0[38] + bf0[39];
-  bf1[39] = bf0[39] + bf0[38];
-  bf1[40] = bf0[40] + bf0[41];
-  bf1[41] = -bf0[41] + bf0[40];
-  bf1[42] = -bf0[42] + bf0[43];
-  bf1[43] = bf0[43] + bf0[42];
-  bf1[44] = bf0[44] + bf0[45];
-  bf1[45] = -bf0[45] + bf0[44];
-  bf1[46] = -bf0[46] + bf0[47];
-  bf1[47] = bf0[47] + bf0[46];
-  bf1[48] = bf0[48] + bf0[49];
-  bf1[49] = -bf0[49] + bf0[48];
-  bf1[50] = -bf0[50] + bf0[51];
-  bf1[51] = bf0[51] + bf0[50];
-  bf1[52] = bf0[52] + bf0[53];
-  bf1[53] = -bf0[53] + bf0[52];
-  bf1[54] = -bf0[54] + bf0[55];
-  bf1[55] = bf0[55] + bf0[54];
-  bf1[56] = bf0[56] + bf0[57];
-  bf1[57] = -bf0[57] + bf0[56];
-  bf1[58] = -bf0[58] + bf0[59];
-  bf1[59] = bf0[59] + bf0[58];
-  bf1[60] = bf0[60] + bf0[61];
-  bf1[61] = -bf0[61] + bf0[60];
-  bf1[62] = -bf0[62] + bf0[63];
-  bf1[63] = bf0[63] + bf0[62];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 10
-  stage++;
-  cospi = cospi_arr(cos_bit);
-  bf0 = output;
-  bf1 = step;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[1];
-  bf1[2] = bf0[2];
-  bf1[3] = bf0[3];
-  bf1[4] = bf0[4];
-  bf1[5] = bf0[5];
-  bf1[6] = bf0[6];
-  bf1[7] = bf0[7];
-  bf1[8] = bf0[8];
-  bf1[9] = bf0[9];
-  bf1[10] = bf0[10];
-  bf1[11] = bf0[11];
-  bf1[12] = bf0[12];
-  bf1[13] = bf0[13];
-  bf1[14] = bf0[14];
-  bf1[15] = bf0[15];
-  bf1[16] = bf0[16];
-  bf1[17] = bf0[17];
-  bf1[18] = bf0[18];
-  bf1[19] = bf0[19];
-  bf1[20] = bf0[20];
-  bf1[21] = bf0[21];
-  bf1[22] = bf0[22];
-  bf1[23] = bf0[23];
-  bf1[24] = bf0[24];
-  bf1[25] = bf0[25];
-  bf1[26] = bf0[26];
-  bf1[27] = bf0[27];
-  bf1[28] = bf0[28];
-  bf1[29] = bf0[29];
-  bf1[30] = bf0[30];
-  bf1[31] = bf0[31];
-  bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit);
-  bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit);
-  bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit);
-  bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit);
-  bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit);
-  bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit);
-  bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit);
-  bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit);
-  bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit);
-  bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit);
-  bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit);
-  bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit);
-  bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit);
-  bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit);
-  bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit);
-  bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit);
-  bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit);
-  bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit);
-  bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit);
-  bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit);
-  bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit);
-  bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit);
-  bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit);
-  bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit);
-  bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit);
-  bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit);
-  bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit);
-  bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit);
-  bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit);
-  bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit);
-  bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit);
-  bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit);
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-
-  // stage 11
-  stage++;
-  bf0 = step;
-  bf1 = output;
-  bf1[0] = bf0[0];
-  bf1[1] = bf0[32];
-  bf1[2] = bf0[16];
-  bf1[3] = bf0[48];
-  bf1[4] = bf0[8];
-  bf1[5] = bf0[40];
-  bf1[6] = bf0[24];
-  bf1[7] = bf0[56];
-  bf1[8] = bf0[4];
-  bf1[9] = bf0[36];
-  bf1[10] = bf0[20];
-  bf1[11] = bf0[52];
-  bf1[12] = bf0[12];
-  bf1[13] = bf0[44];
-  bf1[14] = bf0[28];
-  bf1[15] = bf0[60];
-  bf1[16] = bf0[2];
-  bf1[17] = bf0[34];
-  bf1[18] = bf0[18];
-  bf1[19] = bf0[50];
-  bf1[20] = bf0[10];
-  bf1[21] = bf0[42];
-  bf1[22] = bf0[26];
-  bf1[23] = bf0[58];
-  bf1[24] = bf0[6];
-  bf1[25] = bf0[38];
-  bf1[26] = bf0[22];
-  bf1[27] = bf0[54];
-  bf1[28] = bf0[14];
-  bf1[29] = bf0[46];
-  bf1[30] = bf0[30];
-  bf1[31] = bf0[62];
-  bf1[32] = bf0[1];
-  bf1[33] = bf0[33];
-  bf1[34] = bf0[17];
-  bf1[35] = bf0[49];
-  bf1[36] = bf0[9];
-  bf1[37] = bf0[41];
-  bf1[38] = bf0[25];
-  bf1[39] = bf0[57];
-  bf1[40] = bf0[5];
-  bf1[41] = bf0[37];
-  bf1[42] = bf0[21];
-  bf1[43] = bf0[53];
-  bf1[44] = bf0[13];
-  bf1[45] = bf0[45];
-  bf1[46] = bf0[29];
-  bf1[47] = bf0[61];
-  bf1[48] = bf0[3];
-  bf1[49] = bf0[35];
-  bf1[50] = bf0[19];
-  bf1[51] = bf0[51];
-  bf1[52] = bf0[11];
-  bf1[53] = bf0[43];
-  bf1[54] = bf0[27];
-  bf1[55] = bf0[59];
-  bf1[56] = bf0[7];
-  bf1[57] = bf0[39];
-  bf1[58] = bf0[23];
-  bf1[59] = bf0[55];
-  bf1[60] = bf0[15];
-  bf1[61] = bf0[47];
-  bf1[62] = bf0[31];
-  bf1[63] = bf0[63];
-  av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
-}
diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d.h b/third_party/aom/av1/encoder/av1_fwd_txfm1d.h
deleted file mode 100644
index 9dcf16552..000000000
--- a/third_party/aom/av1/encoder/av1_fwd_txfm1d.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_
-#define AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_
-
-#include "av1/common/av1_txfm.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range);
-void av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                   const int8_t *stage_range);
-void av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                    const int8_t *stage_range);
-void av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
-                     const int8_t *stage_range);
-void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
-                      const int8_t *stage_range);
-void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
-                      const int8_t *stage_range);
-void av1_fidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
-                       const int8_t *stage_range);
-void av1_fidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
-                       const int8_t *stage_range);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_
diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h b/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h
deleted file mode 100644
index 98b6530db..000000000
--- a/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_
-#define AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_
-#include "av1/common/enums.h"
-#include "av1/encoder/av1_fwd_txfm1d.h"
-extern const int8_t *fwd_txfm_shift_ls[TX_SIZES_ALL];
-extern const int8_t fwd_cos_bit_col[5][5];
-extern const int8_t fwd_cos_bit_row[5][5];
-#endif  // AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_
diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm2d.c b/third_party/aom/av1/encoder/av1_fwd_txfm2d.c
deleted file mode 100644
index f25a667cf..000000000
--- a/third_party/aom/av1/encoder/av1_fwd_txfm2d.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/txfm_common.h"
-#include "av1/common/enums.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/encoder/av1_fwd_txfm1d.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-
-static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
-  switch (txfm_type) {
-    case TXFM_TYPE_DCT4: return av1_fdct4_new;
-    case TXFM_TYPE_DCT8: return av1_fdct8_new;
-    case TXFM_TYPE_DCT16: return av1_fdct16_new;
-    case TXFM_TYPE_DCT32: return av1_fdct32_new;
-    case TXFM_TYPE_DCT64: return av1_fdct64_new;
-    case TXFM_TYPE_ADST4: return av1_fadst4_new;
-    case TXFM_TYPE_ADST8: return av1_fadst8_new;
-    case TXFM_TYPE_ADST16: return av1_fadst16_new;
-    case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c;
-    case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
-    case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
-    case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c;
-    default: assert(0); return NULL;
-  }
-}
-
-void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
-                             const TXFM_2D_FLIP_CFG *cfg, int bd) {
-  // Take the shift from the larger dimension in the rectangular case.
-  const int8_t *shift = cfg->shift;
-  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
-  for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) {
-    stage_range_col[i] = cfg->stage_range_col[i] + shift[0] + bd + 1;
-  }
-
-  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
-  for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) {
-    stage_range_row[i] = cfg->stage_range_row[i] + shift[0] + shift[1] + bd + 1;
-  }
-}
-
-static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
-                                const int stride, const TXFM_2D_FLIP_CFG *cfg,
-                                int32_t *buf, int bd) {
-  int c, r;
-  // Note when assigning txfm_size_col, we use the txfm_size from the
-  // row configuration and vice versa. This is intentionally done to
-  // accurately perform rectangular transforms. When the transform is
-  // rectangular, the number of columns will be the same as the
-  // txfm_size stored in the row cfg struct. It will make no difference
-  // for square transforms.
-  const int txfm_size_col = tx_size_wide[cfg->tx_size];
-  const int txfm_size_row = tx_size_high[cfg->tx_size];
-  // Take the shift from the larger dimension in the rectangular case.
-  const int8_t *shift = cfg->shift;
-  const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
-  int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
-  int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
-  assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM);
-  assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM);
-  av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bd);
-
-  const int8_t cos_bit_col = cfg->cos_bit_col;
-  const int8_t cos_bit_row = cfg->cos_bit_row;
-  const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
-  const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
-
-  // use output buffer as temp buffer
-  int32_t *temp_in = output;
-  int32_t *temp_out = output + txfm_size_row;
-
-  // Columns
-  for (c = 0; c < txfm_size_col; ++c) {
-    if (cfg->ud_flip == 0) {
-      for (r = 0; r < txfm_size_row; ++r) temp_in[r] = input[r * stride + c];
-    } else {
-      for (r = 0; r < txfm_size_row; ++r)
-        // flip upside down
-        temp_in[r] = input[(txfm_size_row - r - 1) * stride + c];
-    }
-    av1_round_shift_array(temp_in, txfm_size_row, -shift[0]);
-    txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
-    av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
-    if (cfg->lr_flip == 0) {
-      for (r = 0; r < txfm_size_row; ++r)
-        buf[r * txfm_size_col + c] = temp_out[r];
-    } else {
-      for (r = 0; r < txfm_size_row; ++r)
-        // flip from left to right
-        buf[r * txfm_size_col + (txfm_size_col - c - 1)] = temp_out[r];
-    }
-  }
-
-  // Rows
-  for (r = 0; r < txfm_size_row; ++r) {
-    txfm_func_row(buf + r * txfm_size_col, output + r * txfm_size_col,
-                  cos_bit_row, stage_range_row);
-    av1_round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift[2]);
-    if (abs(rect_type) == 1) {
-      // Multiply everything by Sqrt2 if the transform is rectangular and the
-      // size difference is a factor of 2.
-      for (c = 0; c < txfm_size_col; ++c) {
-        output[r * txfm_size_col + c] = round_shift(
-            (int64_t)output[r * txfm_size_col + c] * NewSqrt2, NewSqrt2Bits);
-      }
-    }
-  }
-}
-
-void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
-                          TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 8]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_4X8, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
-                          TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[8 * 4];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_8X4, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
-                           TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 16]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_8X16, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
-                           TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[16 * 8];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_16X8, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 32]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_16X32, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[32 * 16];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_32X16, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride,
-                           TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 16]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_4X16, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x4_c(const int16_t *input, int32_t *output, int stride,
-                           TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[16 * 4];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_16X4, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride,
-                           TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 8]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_8X32, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride,
-                           TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[32 * 8];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_32X8, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
-                          TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[4 * 4];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_4X4, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
-                          TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[8 * 8];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_8X8, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[16 * 16];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_16X16, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[32 * 32];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-}
-
-void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[64 * 64];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_64X64, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-
-  // Zero out top-right 32x32 area.
-  for (int row = 0; row < 32; ++row) {
-    memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
-  }
-  // Zero out the bottom 64x32 area.
-  memset(output + 32 * 64, 0, 32 * 64 * sizeof(*output));
-  // Re-pack non-zero coeffs in the first 32x32 indices.
-  for (int row = 1; row < 32; ++row) {
-    memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
-  }
-}
-
-void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 64]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_32X64, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-  // Zero out the bottom 32x32 area.
-  memset(output + 32 * 32, 0, 32 * 32 * sizeof(*output));
-  // Note: no repacking needed here.
-}
-
-void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[64 * 32];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_64X32, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-
-  // Zero out right 32x32 area.
-  for (int row = 0; row < 32; ++row) {
-    memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
-  }
-  // Re-pack non-zero coeffs in the first 32x32 indices.
-  for (int row = 1; row < 32; ++row) {
-    memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
-  }
-}
-
-void av1_fwd_txfm2d_16x64_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(32, int32_t, txfm_buf[64 * 16]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_16X64, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-  // Zero out the bottom 16x32 area.
-  memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
-  // Note: no repacking needed here.
-}
-
-void av1_fwd_txfm2d_64x16_c(const int16_t *input, int32_t *output, int stride,
-                            TX_TYPE tx_type, int bd) {
-  int32_t txfm_buf[64 * 16];
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_64X16, &cfg);
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
-  // Zero out right 32x16 area.
-  for (int row = 0; row < 16; ++row) {
-    memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
-  }
-  // Re-pack non-zero coeffs in the first 32x16 indices.
-  for (int row = 1; row < 16; ++row) {
-    memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
-  }
-}
-
-static const int8_t fwd_shift_4x4[3] = { 2, 0, 0 };
-static const int8_t fwd_shift_8x8[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_16x16[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_32x32[3] = { 2, -4, 0 };
-static const int8_t fwd_shift_64x64[3] = { 0, -2, -2 };
-static const int8_t fwd_shift_4x8[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_8x4[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_8x16[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_16x8[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_16x32[3] = { 2, -4, 0 };
-static const int8_t fwd_shift_32x16[3] = { 2, -4, 0 };
-static const int8_t fwd_shift_32x64[3] = { 0, -2, -2 };
-static const int8_t fwd_shift_64x32[3] = { 2, -4, -2 };
-static const int8_t fwd_shift_4x16[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_16x4[3] = { 2, -1, 0 };
-static const int8_t fwd_shift_8x32[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_32x8[3] = { 2, -2, 0 };
-static const int8_t fwd_shift_16x64[3] = { 0, -2, 0 };
-static const int8_t fwd_shift_64x16[3] = { 2, -4, 0 };
-
-const int8_t *fwd_txfm_shift_ls[TX_SIZES_ALL] = {
-  fwd_shift_4x4,   fwd_shift_8x8,   fwd_shift_16x16, fwd_shift_32x32,
-  fwd_shift_64x64, fwd_shift_4x8,   fwd_shift_8x4,   fwd_shift_8x16,
-  fwd_shift_16x8,  fwd_shift_16x32, fwd_shift_32x16, fwd_shift_32x64,
-  fwd_shift_64x32, fwd_shift_4x16,  fwd_shift_16x4,  fwd_shift_8x32,
-  fwd_shift_32x8,  fwd_shift_16x64, fwd_shift_64x16,
-};
-
-const int8_t fwd_cos_bit_col[MAX_TXWH_IDX /*txw_idx*/]
-                            [MAX_TXWH_IDX /*txh_idx*/] = {
-                              { 13, 13, 13, 0, 0 },
-                              { 13, 13, 13, 12, 0 },
-                              { 13, 13, 13, 12, 13 },
-                              { 0, 13, 13, 12, 13 },
-                              { 0, 0, 13, 12, 13 }
-                            };
-
-const int8_t fwd_cos_bit_row[MAX_TXWH_IDX /*txw_idx*/]
-                            [MAX_TXWH_IDX /*txh_idx*/] = {
-                              { 13, 13, 12, 0, 0 },
-                              { 13, 13, 13, 12, 0 },
-                              { 13, 13, 12, 13, 12 },
-                              { 0, 12, 13, 12, 11 },
-                              { 0, 0, 12, 11, 10 }
-                            };
-
-static const int8_t fdct4_range_mult2[4] = { 0, 2, 3, 3 };
-static const int8_t fdct8_range_mult2[6] = { 0, 2, 4, 5, 5, 5 };
-static const int8_t fdct16_range_mult2[8] = { 0, 2, 4, 6, 7, 7, 7, 7 };
-static const int8_t fdct32_range_mult2[10] = { 0, 2, 4, 6, 8, 9, 9, 9, 9, 9 };
-static const int8_t fdct64_range_mult2[12] = { 0,  2,  4,  6,  8,  10,
-                                               11, 11, 11, 11, 11, 11 };
-
-static const int8_t fadst4_range_mult2[7] = { 0, 2, 4, 3, 3, 3, 3 };
-static const int8_t fadst8_range_mult2[8] = { 0, 0, 1, 3, 3, 5, 5, 5 };
-static const int8_t fadst16_range_mult2[10] = { 0, 0, 1, 3, 3, 5, 5, 7, 7, 7 };
-
-static const int8_t max_fwd_range_mult2_col[5] = { 3, 5, 7, 9, 11 };
-
-static const int8_t fidtx4_range_mult2[1] = { 1 };
-static const int8_t fidtx8_range_mult2[1] = { 2 };
-static const int8_t fidtx16_range_mult2[1] = { 3 };
-static const int8_t fidtx32_range_mult2[1] = { 4 };
-
-#if 0
-const int8_t fwd_idtx_range_row[MAX_TXWH_IDX /*txw_idx*/]
-                               [MAX_TXWH_IDX /*txh_idx*/] = { { 2, 4, 5, 0, 0 },
-                                                              { 3, 4, 5, 6, 0 },
-                                                              { 4, 5, 6, 7, 8 },
-                                                              { 0, 5, 6, 7, 8 },
-                                                              { 0, 0, 7, 8,
-                                                                9 } };
-#endif
-
-const int8_t *fwd_txfm_range_mult2_list[TXFM_TYPES] = {
-  fdct4_range_mult2,  fdct8_range_mult2,   fdct16_range_mult2,
-  fdct32_range_mult2, fdct64_range_mult2,  fadst4_range_mult2,
-  fadst8_range_mult2, fadst16_range_mult2, fidtx4_range_mult2,
-  fidtx8_range_mult2, fidtx16_range_mult2, fidtx32_range_mult2
-};
-
-static INLINE void set_fwd_txfm_non_scale_range(TXFM_2D_FLIP_CFG *cfg) {
-  const int txh_idx = get_txh_idx(cfg->tx_size);
-  av1_zero(cfg->stage_range_col);
-  av1_zero(cfg->stage_range_row);
-
-  if (cfg->txfm_type_col != TXFM_TYPE_INVALID) {
-    int stage_num_col = cfg->stage_num_col;
-    const int8_t *range_mult2_col =
-        fwd_txfm_range_mult2_list[cfg->txfm_type_col];
-    for (int i = 0; i < stage_num_col; ++i)
-      cfg->stage_range_col[i] = (range_mult2_col[i] + 1) >> 1;
-  }
-
-  if (cfg->txfm_type_row != TXFM_TYPE_INVALID) {
-    int stage_num_row = cfg->stage_num_row;
-    const int8_t *range_mult2_row =
-        fwd_txfm_range_mult2_list[cfg->txfm_type_row];
-    for (int i = 0; i < stage_num_row; ++i)
-      cfg->stage_range_row[i] =
-          (max_fwd_range_mult2_col[txh_idx] + range_mult2_row[i] + 1) >> 1;
-  }
-}
-
-void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
-                          TXFM_2D_FLIP_CFG *cfg) {
-  assert(cfg != NULL);
-  cfg->tx_size = tx_size;
-  set_flip_cfg(tx_type, cfg);
-  const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type];
-  const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type];
-  const int txw_idx = tx_size_wide_log2[tx_size] - tx_size_wide_log2[0];
-  const int txh_idx = tx_size_high_log2[tx_size] - tx_size_high_log2[0];
-  cfg->shift = fwd_txfm_shift_ls[tx_size];
-  cfg->cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  cfg->cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col];
-  cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row];
-  cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col];
-  cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row];
-  set_fwd_txfm_non_scale_range(cfg);
-}
diff --git a/third_party/aom/av1/encoder/av1_quantize.c b/third_party/aom/av1/encoder/av1_quantize.c
deleted file mode 100644
index a0a926005..000000000
--- a/third_party/aom/av1/encoder/av1_quantize.c
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/quantize.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/idct.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/scan.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/rd.h"
-
-void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
-                       tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-  *eob_ptr = 0;
-}
-
-static void quantize_fp_helper_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
-    const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
-    const qm_val_t *iqm_ptr, int log_scale) {
-  int i, eob = -1;
-  // TODO(jingning) Decide the need of these arguments after the
-  // quantization process is completed.
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (qm_ptr == NULL && iqm_ptr == NULL) {
-    const int rounding0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
-    {  // rc == 0
-      const int coeff = coeff_ptr[0];
-      const int coeff_sign = (coeff >> 31);
-      int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      if ((abs_coeff << (1 + log_scale)) >= (int32_t)(dequant_ptr[0])) {
-        abs_coeff = clamp64(abs_coeff + rounding0, INT16_MIN, INT16_MAX);
-        const int tmp32 = (int)((abs_coeff * quant_ptr[0]) >> (16 - log_scale));
-        if (tmp32) {
-          qcoeff_ptr[0] = (tmp32 ^ coeff_sign) - coeff_sign;
-          const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[0]) >> log_scale;
-          dqcoeff_ptr[0] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
-          eob = 0;
-        }
-      }
-    }
-    const int rounding1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
-    const int32_t thresh1 = (int32_t)(dequant_ptr[1]);
-    for (i = 1; i < n_coeffs; i++) {
-      const int coeff = coeff_ptr[i];
-      const int coeff_sign = (coeff >> 31);
-      int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      if ((abs_coeff << (1 + log_scale)) >= thresh1) {
-        abs_coeff = clamp64(abs_coeff + rounding1, INT16_MIN, INT16_MAX);
-        const int tmp32 = (int)((abs_coeff * quant_ptr[1]) >> (16 - log_scale));
-        if (tmp32) {
-          qcoeff_ptr[i] = (tmp32 ^ coeff_sign) - coeff_sign;
-          const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[1]) >> log_scale;
-          dqcoeff_ptr[i] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
-          eob = AOMMAX(iscan[i], eob);
-        }
-      }
-    }
-  } else {
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
-      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
-      const int dequant =
-          (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-      const int coeff_sign = (coeff >> 31);
-      int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      int tmp32 = 0;
-      if (abs_coeff * wt >=
-          (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
-        abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
-        abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
-        tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
-                      (16 - log_scale + AOM_QM_BITS));
-        qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-        const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
-        dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
-      }
-
-      if (tmp32) eob = i;
-    }
-  }
-  *eob_ptr = eob + 1;
-}
-
-static void highbd_quantize_fp_helper_c(
-    const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
-    const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
-    const qm_val_t *iqm_ptr, int log_scale) {
-  int i;
-  int eob = -1;
-  const int shift = 16 - log_scale;
-  // TODO(jingning) Decide the need of these arguments after the
-  // quantization process is completed.
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  (void)iscan;
-
-  if (qm_ptr || iqm_ptr) {
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < count; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
-      const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
-      const int dequant =
-          (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-      const int coeff_sign = (coeff >> 31);
-      const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      int abs_qcoeff = 0;
-      if (abs_coeff * wt >=
-          (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
-        const int64_t tmp =
-            abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
-        abs_qcoeff =
-            (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
-        qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-        const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
-        dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
-        if (abs_qcoeff) eob = i;
-      } else {
-        qcoeff_ptr[rc] = 0;
-        dqcoeff_ptr[rc] = 0;
-      }
-    }
-  } else {
-    const int log_scaled_round_arr[2] = {
-      ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
-      ROUND_POWER_OF_TWO(round_ptr[1], log_scale),
-    };
-    for (i = 0; i < count; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-      const int rc01 = (rc != 0);
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int log_scaled_round = log_scaled_round_arr[rc01];
-      if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) {
-        const int quant = quant_ptr[rc01];
-        const int dequant = dequant_ptr[rc01];
-        const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
-        const int abs_qcoeff = (int)((tmp * quant) >> shift);
-        qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-        const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
-        if (abs_qcoeff) eob = i;
-        dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
-      } else {
-        qcoeff_ptr[rc] = 0;
-        dqcoeff_ptr[rc] = 0;
-      }
-    }
-  }
-  *eob_ptr = eob + 1;
-}
-
-void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                       const int16_t *zbin_ptr, const int16_t *round_ptr,
-                       const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
-                       tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                       const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                       const int16_t *scan, const int16_t *iscan) {
-  quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
-                       quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
-                       eob_ptr, scan, iscan, NULL, NULL, 0);
-}
-
-void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             const int16_t *zbin_ptr, const int16_t *round_ptr,
-                             const int16_t *quant_ptr,
-                             const int16_t *quant_shift_ptr,
-                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan) {
-  quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
-                       quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
-                       eob_ptr, scan, iscan, NULL, NULL, 1);
-}
-
-void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             const int16_t *zbin_ptr, const int16_t *round_ptr,
-                             const int16_t *quant_ptr,
-                             const int16_t *quant_shift_ptr,
-                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan) {
-  quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
-                       quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
-                       eob_ptr, scan, iscan, NULL, NULL, 2);
-}
-
-void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                            const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
-                            tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
-  const qm_val_t *qm_ptr = qparam->qmatrix;
-  const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  if (qm_ptr != NULL && iqm_ptr != NULL) {
-    quantize_fp_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
-                         p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                         dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                         sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
-  } else {
-    switch (qparam->log_scale) {
-      case 0:
-        if (n_coeffs < 16) {
-          // TODO(jingning): Need SIMD implementation for smaller block size
-          // quantization.
-          quantize_fp_helper_c(
-              coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
-              p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-              p->dequant_QTX, eob_ptr, sc->scan, sc->iscan, NULL, NULL, 0);
-        } else {
-          av1_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
-                          p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                          dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                          sc->iscan);
-        }
-        break;
-      case 1:
-        av1_quantize_fp_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
-                              p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                              dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                              sc->iscan);
-        break;
-      case 2:
-        av1_quantize_fp_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
-                              p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                              dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                              sc->iscan);
-        break;
-      default: assert(0);
-    }
-  }
-}
-
-void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                           const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
-                           tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                           const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
-  const qm_val_t *qm_ptr = qparam->qmatrix;
-  const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  if (qm_ptr != NULL && iqm_ptr != NULL) {
-    quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                        p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                        dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                        sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
-  } else {
-    switch (qparam->log_scale) {
-      case 0:
-        aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                       p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                       dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                       sc->iscan);
-        break;
-      case 1:
-        aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                             p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                             dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                             sc->iscan);
-        break;
-      case 2:
-        aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                             p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                             dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                             sc->iscan);
-        break;
-      default: assert(0);
-    }
-  }
-}
-
-static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
-                        int skip_block, const int16_t *round_ptr,
-                        const int16_t quant, tran_low_t *qcoeff_ptr,
-                        tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
-                        uint16_t *eob_ptr, const qm_val_t *qm_ptr,
-                        const qm_val_t *iqm_ptr, const int log_scale) {
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int64_t tmp;
-  int eob = -1;
-  int32_t tmp32;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
-    const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
-                INT16_MIN, INT16_MAX);
-    tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
-    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-    dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale;
-    dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
-    if (tmp32) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                            const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
-                            tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
-  // obsolete skip_block
-  const int skip_block = 0;
-  (void)sc;
-  assert(qparam->log_scale >= 0 && qparam->log_scale < (3));
-  const qm_val_t *qm_ptr = qparam->qmatrix;
-  const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round_QTX,
-              p->quant_fp_QTX[0], qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX[0],
-              eob_ptr, qm_ptr, iqm_ptr, qparam->log_scale);
-}
-
-void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
-                                   intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
-                                   tran_low_t *qcoeff_ptr,
-                                   tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                   const SCAN_ORDER *sc,
-                                   const QUANT_PARAM *qparam) {
-  const qm_val_t *qm_ptr = qparam->qmatrix;
-  const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  if (qm_ptr != NULL && iqm_ptr != NULL) {
-    highbd_quantize_fp_helper_c(
-        coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
-        p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
-        sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
-  } else {
-    if (n_coeffs < 16) {
-      // TODO(jingning): Need SIMD implementation for smaller block size
-      // quantization.
-      av1_highbd_quantize_fp_c(
-          coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
-          p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
-          sc->scan, sc->iscan, qparam->log_scale);
-      return;
-    }
-    av1_highbd_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX,
-                           p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                           dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                           sc->iscan, qparam->log_scale);
-  }
-}
-
-void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
-                                  intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                  const SCAN_ORDER *sc,
-                                  const QUANT_PARAM *qparam) {
-  const qm_val_t *qm_ptr = qparam->qmatrix;
-  const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  if (qm_ptr != NULL && iqm_ptr != NULL) {
-    highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                               p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                               dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                               sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
-  } else {
-    switch (qparam->log_scale) {
-      case 0:
-        if (LIKELY(n_coeffs >= 8)) {
-          aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                                p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                                dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                                sc->iscan);
-        } else {
-          // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
-          // quantization
-          aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, p->zbin_QTX,
-                                  p->round_QTX, p->quant_QTX,
-                                  p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr,
-                                  p->dequant_QTX, eob_ptr, sc->scan, sc->iscan);
-        }
-        break;
-      case 1:
-        aom_highbd_quantize_b_32x32(
-            coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
-            p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
-            eob_ptr, sc->scan, sc->iscan);
-        break;
-      case 2:
-        aom_highbd_quantize_b_64x64(
-            coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
-            p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
-            eob_ptr, sc->scan, sc->iscan);
-        break;
-      default: assert(0);
-    }
-  }
-}
-
-static INLINE void highbd_quantize_dc(
-    const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
-    const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr,
-    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) {
-  int eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const qm_val_t wt = qm_ptr != NULL ? qm_ptr[0] : (1 << AOM_QM_BITS);
-    const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[0] : (1 << AOM_QM_BITS);
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
-    const int64_t tmpw = tmp * wt;
-    const int abs_qcoeff =
-        (int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS));
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    const int dequant =
-        (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-
-    const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale;
-    dqcoeff_ptr[0] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
-                                   intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
-                                   tran_low_t *qcoeff_ptr,
-                                   tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                   const SCAN_ORDER *sc,
-                                   const QUANT_PARAM *qparam) {
-  // obsolete skip_block
-  const int skip_block = 0;
-  const qm_val_t *qm_ptr = qparam->qmatrix;
-  const qm_val_t *iqm_ptr = qparam->iqmatrix;
-  (void)sc;
-
-  highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round_QTX,
-                     p->quant_fp_QTX[0], qcoeff_ptr, dqcoeff_ptr,
-                     p->dequant_QTX[0], eob_ptr, qm_ptr, iqm_ptr,
-                     qparam->log_scale);
-}
-
-void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
-                              const int16_t *zbin_ptr, const int16_t *round_ptr,
-                              const int16_t *quant_ptr,
-                              const int16_t *quant_shift_ptr,
-                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                              const int16_t *scan, const int16_t *iscan,
-                              int log_scale) {
-  highbd_quantize_fp_helper_c(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr,
-                              quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                              dequant_ptr, eob_ptr, scan, iscan, NULL, NULL,
-                              log_scale);
-}
-
-static void invert_quant(int16_t *quant, int16_t *shift, int d) {
-  uint32_t t;
-  int l, m;
-  t = d;
-  for (l = 0; t > 1; l++) t >>= 1;
-  m = 1 + (1 << (16 + l)) / d;
-  *quant = (int16_t)(m - (1 << 16));
-  *shift = 1 << (16 - l);
-}
-
-static int get_qzbin_factor(int q, aom_bit_depth_t bit_depth) {
-  const int quant = av1_dc_quant_Q3(q, 0, bit_depth);
-  switch (bit_depth) {
-    case AOM_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
-    case AOM_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
-    case AOM_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
-    default:
-      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
-      return -1;
-  }
-}
-
-void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
-                         int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q,
-                         int v_ac_delta_q, QUANTS *const quants,
-                         Dequants *const deq) {
-  int i, q, quant_Q3, quant_QTX;
-
-  for (q = 0; q < QINDEX_RANGE; q++) {
-    const int qzbin_factor = get_qzbin_factor(q, bit_depth);
-    const int qrounding_factor = q == 0 ? 64 : 48;
-
-    for (i = 0; i < 2; ++i) {
-      int qrounding_factor_fp = 64;
-      // y quantizer setup with original coeff shift of Q3
-      quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, y_dc_delta_q, bit_depth)
-                        : av1_ac_quant_Q3(q, 0, bit_depth);
-      // y quantizer with TX scale
-      quant_QTX = i == 0 ? av1_dc_quant_QTX(q, y_dc_delta_q, bit_depth)
-                         : av1_ac_quant_QTX(q, 0, bit_depth);
-      invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i],
-                   quant_QTX);
-      quants->y_quant_fp[q][i] = (1 << 16) / quant_QTX;
-      quants->y_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7;
-      quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7);
-      quants->y_round[q][i] = (qrounding_factor * quant_QTX) >> 7;
-      deq->y_dequant_QTX[q][i] = quant_QTX;
-      deq->y_dequant_Q3[q][i] = quant_Q3;
-
-      // u quantizer setup with original coeff shift of Q3
-      quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, u_dc_delta_q, bit_depth)
-                        : av1_ac_quant_Q3(q, u_ac_delta_q, bit_depth);
-      // u quantizer with TX scale
-      quant_QTX = i == 0 ? av1_dc_quant_QTX(q, u_dc_delta_q, bit_depth)
-                         : av1_ac_quant_QTX(q, u_ac_delta_q, bit_depth);
-      invert_quant(&quants->u_quant[q][i], &quants->u_quant_shift[q][i],
-                   quant_QTX);
-      quants->u_quant_fp[q][i] = (1 << 16) / quant_QTX;
-      quants->u_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7;
-      quants->u_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7);
-      quants->u_round[q][i] = (qrounding_factor * quant_QTX) >> 7;
-      deq->u_dequant_QTX[q][i] = quant_QTX;
-      deq->u_dequant_Q3[q][i] = quant_Q3;
-
-      // v quantizer setup with original coeff shift of Q3
-      quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, v_dc_delta_q, bit_depth)
-                        : av1_ac_quant_Q3(q, v_ac_delta_q, bit_depth);
-      // v quantizer with TX scale
-      quant_QTX = i == 0 ? av1_dc_quant_QTX(q, v_dc_delta_q, bit_depth)
-                         : av1_ac_quant_QTX(q, v_ac_delta_q, bit_depth);
-      invert_quant(&quants->v_quant[q][i], &quants->v_quant_shift[q][i],
-                   quant_QTX);
-      quants->v_quant_fp[q][i] = (1 << 16) / quant_QTX;
-      quants->v_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7;
-      quants->v_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7);
-      quants->v_round[q][i] = (qrounding_factor * quant_QTX) >> 7;
-      deq->v_dequant_QTX[q][i] = quant_QTX;
-      deq->v_dequant_Q3[q][i] = quant_Q3;
-    }
-
-    for (i = 2; i < 8; i++) {  // 8: SIMD width
-      quants->y_quant[q][i] = quants->y_quant[q][1];
-      quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
-      quants->y_round_fp[q][i] = quants->y_round_fp[q][1];
-      quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
-      quants->y_zbin[q][i] = quants->y_zbin[q][1];
-      quants->y_round[q][i] = quants->y_round[q][1];
-      deq->y_dequant_QTX[q][i] = deq->y_dequant_QTX[q][1];
-      deq->y_dequant_Q3[q][i] = deq->y_dequant_Q3[q][1];
-
-      quants->u_quant[q][i] = quants->u_quant[q][1];
-      quants->u_quant_fp[q][i] = quants->u_quant_fp[q][1];
-      quants->u_round_fp[q][i] = quants->u_round_fp[q][1];
-      quants->u_quant_shift[q][i] = quants->u_quant_shift[q][1];
-      quants->u_zbin[q][i] = quants->u_zbin[q][1];
-      quants->u_round[q][i] = quants->u_round[q][1];
-      deq->u_dequant_QTX[q][i] = deq->u_dequant_QTX[q][1];
-      deq->u_dequant_Q3[q][i] = deq->u_dequant_Q3[q][1];
-      quants->v_quant[q][i] = quants->u_quant[q][1];
-      quants->v_quant_fp[q][i] = quants->v_quant_fp[q][1];
-      quants->v_round_fp[q][i] = quants->v_round_fp[q][1];
-      quants->v_quant_shift[q][i] = quants->v_quant_shift[q][1];
-      quants->v_zbin[q][i] = quants->v_zbin[q][1];
-      quants->v_round[q][i] = quants->v_round[q][1];
-      deq->v_dequant_QTX[q][i] = deq->v_dequant_QTX[q][1];
-      deq->v_dequant_Q3[q][i] = deq->v_dequant_Q3[q][1];
-    }
-  }
-}
-
-void av1_init_quantizer(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  QUANTS *const quants = &cpi->quants;
-  Dequants *const dequants = &cpi->dequants;
-  av1_build_quantizer(cm->seq_params.bit_depth, cm->y_dc_delta_q,
-                      cm->u_dc_delta_q, cm->u_ac_delta_q, cm->v_dc_delta_q,
-                      cm->v_ac_delta_q, quants, dequants);
-}
-
-void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
-                               int segment_id) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const QUANTS *const quants = &cpi->quants;
-
-  int current_qindex = AOMMAX(
-      0, AOMMIN(QINDEX_RANGE - 1, cpi->oxcf.deltaq_mode != NO_DELTA_Q
-                                      ? cm->base_qindex + xd->delta_qindex
-                                      : cm->base_qindex));
-  const int qindex = av1_get_qindex(&cm->seg, segment_id, current_qindex);
-  const int rdmult = av1_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
-  int qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0)
-                    ? NUM_QM_LEVELS - 1
-                    : cm->qm_y;
-
-  // Y
-  x->plane[0].quant_QTX = quants->y_quant[qindex];
-  x->plane[0].quant_fp_QTX = quants->y_quant_fp[qindex];
-  x->plane[0].round_fp_QTX = quants->y_round_fp[qindex];
-  x->plane[0].quant_shift_QTX = quants->y_quant_shift[qindex];
-  x->plane[0].zbin_QTX = quants->y_zbin[qindex];
-  x->plane[0].round_QTX = quants->y_round[qindex];
-  x->plane[0].dequant_QTX = cpi->dequants.y_dequant_QTX[qindex];
-  memcpy(&xd->plane[0].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][0],
-         sizeof(cm->gqmatrix[qmlevel][0]));
-  memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0],
-         sizeof(cm->giqmatrix[qmlevel][0]));
-  xd->plane[0].dequant_Q3 = cpi->dequants.y_dequant_Q3[qindex];
-
-  // U
-  qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0)
-                ? NUM_QM_LEVELS - 1
-                : cm->qm_u;
-  {
-    x->plane[1].quant_QTX = quants->u_quant[qindex];
-    x->plane[1].quant_fp_QTX = quants->u_quant_fp[qindex];
-    x->plane[1].round_fp_QTX = quants->u_round_fp[qindex];
-    x->plane[1].quant_shift_QTX = quants->u_quant_shift[qindex];
-    x->plane[1].zbin_QTX = quants->u_zbin[qindex];
-    x->plane[1].round_QTX = quants->u_round[qindex];
-    x->plane[1].dequant_QTX = cpi->dequants.u_dequant_QTX[qindex];
-    memcpy(&xd->plane[1].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][1],
-           sizeof(cm->gqmatrix[qmlevel][1]));
-    memcpy(&xd->plane[1].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1],
-           sizeof(cm->giqmatrix[qmlevel][1]));
-    x->plane[1].dequant_QTX = cpi->dequants.u_dequant_QTX[qindex];
-    xd->plane[1].dequant_Q3 = cpi->dequants.u_dequant_Q3[qindex];
-  }
-  // V
-  qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0)
-                ? NUM_QM_LEVELS - 1
-                : cm->qm_v;
-  {
-    x->plane[2].quant_QTX = quants->v_quant[qindex];
-    x->plane[2].quant_fp_QTX = quants->v_quant_fp[qindex];
-    x->plane[2].round_fp_QTX = quants->v_round_fp[qindex];
-    x->plane[2].quant_shift_QTX = quants->v_quant_shift[qindex];
-    x->plane[2].zbin_QTX = quants->v_zbin[qindex];
-    x->plane[2].round_QTX = quants->v_round[qindex];
-    x->plane[2].dequant_QTX = cpi->dequants.v_dequant_QTX[qindex];
-    memcpy(&xd->plane[2].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][2],
-           sizeof(cm->gqmatrix[qmlevel][2]));
-    memcpy(&xd->plane[2].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][2],
-           sizeof(cm->giqmatrix[qmlevel][2]));
-    x->plane[2].dequant_QTX = cpi->dequants.v_dequant_QTX[qindex];
-    xd->plane[2].dequant_Q3 = cpi->dequants.v_dequant_Q3[qindex];
-  }
-  x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
-  x->qindex = qindex;
-
-  set_error_per_bit(x, rdmult);
-
-  av1_initialize_me_consts(cpi, x, qindex);
-}
-
-void av1_frame_init_quantizer(AV1_COMP *cpi) {
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
-}
-
-void av1_set_quantizer(AV1_COMMON *cm, int q) {
-  // quantizer has to be reinitialized with av1_init_quantizer() if any
-  // delta_q changes.
-  cm->base_qindex = AOMMAX(cm->delta_q_present_flag, q);
-  cm->y_dc_delta_q = 0;
-  cm->u_dc_delta_q = 0;
-  cm->u_ac_delta_q = 0;
-  cm->v_dc_delta_q = 0;
-  cm->v_ac_delta_q = 0;
-  cm->qm_y = aom_get_qmlevel(cm->base_qindex, cm->min_qmlevel, cm->max_qmlevel);
-  cm->qm_u = aom_get_qmlevel(cm->base_qindex + cm->u_ac_delta_q,
-                             cm->min_qmlevel, cm->max_qmlevel);
-
-  if (!cm->seq_params.separate_uv_delta_q)
-    cm->qm_v = cm->qm_u;
-  else
-    cm->qm_v = aom_get_qmlevel(cm->base_qindex + cm->v_ac_delta_q,
-                               cm->min_qmlevel, cm->max_qmlevel);
-}
-
-// Table that converts 0-63 Q-range values passed in outside to the Qindex
-// range used internally.
-static const int quantizer_to_qindex[] = {
-  0,   4,   8,   12,  16,  20,  24,  28,  32,  36,  40,  44,  48,
-  52,  56,  60,  64,  68,  72,  76,  80,  84,  88,  92,  96,  100,
-  104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
-  156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
-  208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
-};
-
-int av1_quantizer_to_qindex(int quantizer) {
-  return quantizer_to_qindex[quantizer];
-}
-
-int av1_qindex_to_quantizer(int qindex) {
-  int quantizer;
-
-  for (quantizer = 0; quantizer < 64; ++quantizer)
-    if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
-
-  return 63;
-}
diff --git a/third_party/aom/av1/encoder/av1_quantize.h b/third_party/aom/av1/encoder/av1_quantize.h
deleted file mode 100644
index 35af9a67a..000000000
--- a/third_party/aom/av1/encoder/av1_quantize.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_AV1_QUANTIZE_H_
-#define AOM_AV1_ENCODER_AV1_QUANTIZE_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/quant_common.h"
-#include "av1/common/scan.h"
-#include "av1/encoder/block.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct QUANT_PARAM {
-  int log_scale;
-  TX_SIZE tx_size;
-  const qm_val_t *qmatrix;
-  const qm_val_t *iqmatrix;
-} QUANT_PARAM;
-
-typedef void (*AV1_QUANT_FACADE)(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                                 const MACROBLOCK_PLANE *p,
-                                 tran_low_t *qcoeff_ptr,
-                                 tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                 const SCAN_ORDER *sc,
-                                 const QUANT_PARAM *qparam);
-
-// The QUANTS structure is used only for internal quantizer setup in
-// av1_quantize.c.
-// All of its fields use the same coefficient shift/scaling at TX.
-typedef struct {
-  // 0: dc 1: ac 2-8: ac repeated to SIMD width
-  DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]);
-
-  // TODO(jingning): in progress of re-working the quantization. will decide
-  // if we want to deprecate the current use of y_quant.
-  DECLARE_ALIGNED(16, int16_t, y_quant_fp[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, u_quant_fp[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, v_quant_fp[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, y_round_fp[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, u_round_fp[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, v_round_fp[QINDEX_RANGE][8]);
-
-  DECLARE_ALIGNED(16, int16_t, u_quant[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, v_quant[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, u_quant_shift[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, v_quant_shift[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, u_zbin[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, v_zbin[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, u_round[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, v_round[QINDEX_RANGE][8]);
-} QUANTS;
-
-// The Dequants structure is used only for internal quantizer setup in
-// av1_quantize.c.
-// Fields are sufffixed according to whether or not they're expressed in
-// the same coefficient shift/precision as TX or a fixed Q3 format.
-typedef struct {
-  DECLARE_ALIGNED(16, int16_t,
-                  y_dequant_QTX[QINDEX_RANGE][8]);  // 8: SIMD width
-  DECLARE_ALIGNED(16, int16_t,
-                  u_dequant_QTX[QINDEX_RANGE][8]);  // 8: SIMD width
-  DECLARE_ALIGNED(16, int16_t,
-                  v_dequant_QTX[QINDEX_RANGE][8]);              // 8: SIMD width
-  DECLARE_ALIGNED(16, int16_t, y_dequant_Q3[QINDEX_RANGE][8]);  // 8: SIMD width
-  DECLARE_ALIGNED(16, int16_t, u_dequant_Q3[QINDEX_RANGE][8]);  // 8: SIMD width
-  DECLARE_ALIGNED(16, int16_t, v_dequant_Q3[QINDEX_RANGE][8]);  // 8: SIMD width
-} Dequants;
-
-struct AV1_COMP;
-struct AV1Common;
-
-void av1_frame_init_quantizer(struct AV1_COMP *cpi);
-
-void av1_init_plane_quantizers(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                               int segment_id);
-
-void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
-                         int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q,
-                         int v_ac_delta_q, QUANTS *const quants,
-                         Dequants *const deq);
-
-void av1_init_quantizer(struct AV1_COMP *cpi);
-
-void av1_set_quantizer(struct AV1Common *cm, int q);
-
-int av1_quantizer_to_qindex(int quantizer);
-
-int av1_qindex_to_quantizer(int qindex);
-
-void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
-                       tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
-
-void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                            const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
-                            tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
-
-void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                           const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
-                           tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                           const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
-
-void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                            const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
-                            tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
-
-void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
-                                   intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
-                                   tran_low_t *qcoeff_ptr,
-                                   tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                   const SCAN_ORDER *sc,
-                                   const QUANT_PARAM *qparam);
-
-void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
-                                  intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                  const SCAN_ORDER *sc,
-                                  const QUANT_PARAM *qparam);
-
-void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
-                                   intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
-                                   tran_low_t *qcoeff_ptr,
-                                   tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                   const SCAN_ORDER *sc,
-                                   const QUANT_PARAM *qparam);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_AV1_QUANTIZE_H_
diff --git a/third_party/aom/av1/encoder/bitstream.c b/third_party/aom/av1/encoder/bitstream.c
deleted file mode 100644
index 2c4acdb02..000000000
--- a/third_party/aom/av1/encoder/bitstream.c
+++ /dev/null
@@ -1,3999 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include <stdio.h>
-
-#include "aom/aom_encoder.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/binary_codes_writer.h"
-#include "aom_dsp/bitwriter_buffer.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem_ops.h"
-#include "aom_ports/system_state.h"
-#if CONFIG_BITSTREAM_DEBUG
-#include "aom_util/debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG
-
-#include "av1/common/cdef.h"
-#include "av1/common/cfl.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/tile_common.h"
-
-#include "av1/encoder/bitstream.h"
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/palette.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/tokenize.h"
-
-#define ENC_MISMATCH_DEBUG 0
-
-static INLINE void write_uniform(aom_writer *w, int n, int v) {
-  const int l = get_unsigned_bits(n);
-  const int m = (1 << l) - n;
-  if (l == 0) return;
-  if (v < m) {
-    aom_write_literal(w, v, l - 1);
-  } else {
-    aom_write_literal(w, m + ((v - m) >> 1), l - 1);
-    aom_write_literal(w, (v - m) & 1, 1);
-  }
-}
-
-static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
-                                             MACROBLOCKD *xd,
-                                             const RestorationUnitInfo *rui,
-                                             aom_writer *const w, int plane,
-                                             FRAME_COUNTS *counts);
-
-static void write_intra_y_mode_kf(FRAME_CONTEXT *frame_ctx,
-                                  const MB_MODE_INFO *mi,
-                                  const MB_MODE_INFO *above_mi,
-                                  const MB_MODE_INFO *left_mi,
-                                  PREDICTION_MODE mode, aom_writer *w) {
-  assert(!is_intrabc_block(mi));
-  (void)mi;
-  aom_write_symbol(w, mode, get_y_mode_cdf(frame_ctx, above_mi, left_mi),
-                   INTRA_MODES);
-}
-
-static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode,
-                             FRAME_CONTEXT *ec_ctx, const int16_t mode_ctx) {
-  const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
-
-  aom_write_symbol(w, mode != NEWMV, ec_ctx->newmv_cdf[newmv_ctx], 2);
-
-  if (mode != NEWMV) {
-    const int16_t zeromv_ctx =
-        (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
-    aom_write_symbol(w, mode != GLOBALMV, ec_ctx->zeromv_cdf[zeromv_ctx], 2);
-
-    if (mode != GLOBALMV) {
-      int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
-      aom_write_symbol(w, mode != NEARESTMV, ec_ctx->refmv_cdf[refmv_ctx], 2);
-    }
-  }
-}
-
-static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
-                          const MB_MODE_INFO_EXT *mbmi_ext, aom_writer *w) {
-  uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-
-  assert(mbmi->ref_mv_idx < 3);
-
-  const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV;
-  if (new_mv) {
-    int idx;
-    for (idx = 0; idx < 2; ++idx) {
-      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-        uint8_t drl_ctx =
-            av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
-
-        aom_write_symbol(w, mbmi->ref_mv_idx != idx, ec_ctx->drl_cdf[drl_ctx],
-                         2);
-        if (mbmi->ref_mv_idx == idx) return;
-      }
-    }
-    return;
-  }
-
-  if (have_nearmv_in_inter_mode(mbmi->mode)) {
-    int idx;
-    // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
-    for (idx = 1; idx < 3; ++idx) {
-      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-        uint8_t drl_ctx =
-            av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
-        aom_write_symbol(w, mbmi->ref_mv_idx != (idx - 1),
-                         ec_ctx->drl_cdf[drl_ctx], 2);
-        if (mbmi->ref_mv_idx == (idx - 1)) return;
-      }
-    }
-    return;
-  }
-}
-
-static void write_inter_compound_mode(MACROBLOCKD *xd, aom_writer *w,
-                                      PREDICTION_MODE mode,
-                                      const int16_t mode_ctx) {
-  assert(is_inter_compound_mode(mode));
-  aom_write_symbol(w, INTER_COMPOUND_OFFSET(mode),
-                   xd->tile_ctx->inter_compound_mode_cdf[mode_ctx],
-                   INTER_COMPOUND_MODES);
-}
-
-static void write_tx_size_vartx(MACROBLOCKD *xd, const MB_MODE_INFO *mbmi,
-                                TX_SIZE tx_size, int depth, int blk_row,
-                                int blk_col, aom_writer *w) {
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
-  const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
-
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
-  if (depth == MAX_VARTX_DEPTH) {
-    txfm_partition_update(xd->above_txfm_context + blk_col,
-                          xd->left_txfm_context + blk_row, tx_size, tx_size);
-    return;
-  }
-
-  const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
-                                         xd->left_txfm_context + blk_row,
-                                         mbmi->sb_type, tx_size);
-  const int txb_size_index =
-      av1_get_txb_size_index(mbmi->sb_type, blk_row, blk_col);
-  const int write_txfm_partition =
-      tx_size == mbmi->inter_tx_size[txb_size_index];
-  if (write_txfm_partition) {
-    aom_write_symbol(w, 0, ec_ctx->txfm_partition_cdf[ctx], 2);
-
-    txfm_partition_update(xd->above_txfm_context + blk_col,
-                          xd->left_txfm_context + blk_row, tx_size, tx_size);
-    // TODO(yuec): set correct txfm partition update for qttx
-  } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    const int bsw = tx_size_wide_unit[sub_txs];
-    const int bsh = tx_size_high_unit[sub_txs];
-
-    aom_write_symbol(w, 1, ec_ctx->txfm_partition_cdf[ctx], 2);
-
-    if (sub_txs == TX_4X4) {
-      txfm_partition_update(xd->above_txfm_context + blk_col,
-                            xd->left_txfm_context + blk_row, sub_txs, tx_size);
-      return;
-    }
-
-    assert(bsw > 0 && bsh > 0);
-    for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh)
-      for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
-        int offsetr = blk_row + row;
-        int offsetc = blk_col + col;
-        write_tx_size_vartx(xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, w);
-      }
-  }
-}
-
-static void write_selected_tx_size(const MACROBLOCKD *xd, aom_writer *w) {
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  if (block_signals_txsize(bsize)) {
-    const TX_SIZE tx_size = mbmi->tx_size;
-    const int tx_size_ctx = get_tx_size_context(xd);
-    const int depth = tx_size_to_depth(tx_size, bsize);
-    const int max_depths = bsize_to_max_depth(bsize);
-    const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
-
-    assert(depth >= 0 && depth <= max_depths);
-    assert(!is_inter_block(mbmi));
-    assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
-
-    aom_write_symbol(w, depth, ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
-                     max_depths + 1);
-  }
-}
-
-static int write_skip(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                      int segment_id, const MB_MODE_INFO *mi, aom_writer *w) {
-  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
-    return 1;
-  } else {
-    const int skip = mi->skip;
-    const int ctx = av1_get_skip_context(xd);
-    FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-    aom_write_symbol(w, skip, ec_ctx->skip_cdfs[ctx], 2);
-    return skip;
-  }
-}
-
-static int write_skip_mode(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                           int segment_id, const MB_MODE_INFO *mi,
-                           aom_writer *w) {
-  if (!cm->skip_mode_flag) return 0;
-  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
-    return 0;
-  }
-  const int skip_mode = mi->skip_mode;
-  if (!is_comp_ref_allowed(mi->sb_type)) {
-    assert(!skip_mode);
-    return 0;
-  }
-  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME) ||
-      segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
-    // These features imply single-reference mode, while skip mode implies
-    // compound reference. Hence, the two are mutually exclusive.
-    // In other words, skip_mode is implicitly 0 here.
-    assert(!skip_mode);
-    return 0;
-  }
-  const int ctx = av1_get_skip_mode_context(xd);
-  aom_write_symbol(w, skip_mode, xd->tile_ctx->skip_mode_cdfs[ctx], 2);
-  return skip_mode;
-}
-
-static void write_is_inter(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                           int segment_id, aom_writer *w, const int is_inter) {
-  if (!segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
-    if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
-      assert(is_inter);
-      return;
-    }
-    const int ctx = av1_get_intra_inter_context(xd);
-    FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-    aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[ctx], 2);
-  }
-}
-
-static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                              const MB_MODE_INFO *mbmi, aom_writer *w) {
-  MOTION_MODE last_motion_mode_allowed =
-      cm->switchable_motion_mode
-          ? motion_mode_allowed(cm->global_motion, xd, mbmi,
-                                cm->allow_warped_motion)
-          : SIMPLE_TRANSLATION;
-  assert(mbmi->motion_mode <= last_motion_mode_allowed);
-  switch (last_motion_mode_allowed) {
-    case SIMPLE_TRANSLATION: break;
-    case OBMC_CAUSAL:
-      aom_write_symbol(w, mbmi->motion_mode == OBMC_CAUSAL,
-                       xd->tile_ctx->obmc_cdf[mbmi->sb_type], 2);
-      break;
-    default:
-      aom_write_symbol(w, mbmi->motion_mode,
-                       xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
-                       MOTION_MODES);
-  }
-}
-
-static void write_delta_qindex(const MACROBLOCKD *xd, int delta_qindex,
-                               aom_writer *w) {
-  int sign = delta_qindex < 0;
-  int abs = sign ? -delta_qindex : delta_qindex;
-  int rem_bits, thr;
-  int smallval = abs < DELTA_Q_SMALL ? 1 : 0;
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
-  aom_write_symbol(w, AOMMIN(abs, DELTA_Q_SMALL), ec_ctx->delta_q_cdf,
-                   DELTA_Q_PROBS + 1);
-
-  if (!smallval) {
-    rem_bits = get_msb(abs - 1);
-    thr = (1 << rem_bits) + 1;
-    aom_write_literal(w, rem_bits - 1, 3);
-    aom_write_literal(w, abs - thr, rem_bits);
-  }
-  if (abs > 0) {
-    aom_write_bit(w, sign);
-  }
-}
-
-static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                                int lf_id, int delta_lflevel, aom_writer *w) {
-  int sign = delta_lflevel < 0;
-  int abs = sign ? -delta_lflevel : delta_lflevel;
-  int rem_bits, thr;
-  int smallval = abs < DELTA_LF_SMALL ? 1 : 0;
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
-  if (cm->delta_lf_multi) {
-    assert(lf_id >= 0 && lf_id < (av1_num_planes(cm) > 1 ? FRAME_LF_COUNT
-                                                         : FRAME_LF_COUNT - 2));
-    aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL),
-                     ec_ctx->delta_lf_multi_cdf[lf_id], DELTA_LF_PROBS + 1);
-  } else {
-    aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf,
-                     DELTA_LF_PROBS + 1);
-  }
-
-  if (!smallval) {
-    rem_bits = get_msb(abs - 1);
-    thr = (1 << rem_bits) + 1;
-    aom_write_literal(w, rem_bits - 1, 3);
-    aom_write_literal(w, abs - thr, rem_bits);
-  }
-  if (abs > 0) {
-    aom_write_bit(w, sign);
-  }
-}
-
-static void pack_map_tokens(aom_writer *w, const TOKENEXTRA **tp, int n,
-                            int num) {
-  const TOKENEXTRA *p = *tp;
-  write_uniform(w, n, p->token);  // The first color index.
-  ++p;
-  --num;
-  for (int i = 0; i < num; ++i) {
-    aom_write_symbol(w, p->token, p->color_map_cdf, n);
-    ++p;
-  }
-  *tp = p;
-}
-
-static void pack_txb_tokens(aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x,
-                            const TOKENEXTRA **tp,
-                            const TOKENEXTRA *const tok_end, MACROBLOCKD *xd,
-                            MB_MODE_INFO *mbmi, int plane,
-                            BLOCK_SIZE plane_bsize, aom_bit_depth_t bit_depth,
-                            int block, int blk_row, int blk_col,
-                            TX_SIZE tx_size, TOKEN_STATS *token_stats) {
-  const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
-  const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const TX_SIZE plane_tx_size =
-      plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
-                                    pd->subsampling_y)
-            : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
-                                                         blk_col)];
-
-  if (tx_size == plane_tx_size || plane) {
-    tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block);
-    const uint16_t eob = x->mbmi_ext->eobs[plane][block];
-    TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block],
-                        x->mbmi_ext->dc_sign_ctx[plane][block] };
-    av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff,
-                         eob, &txb_ctx);
-#if CONFIG_RD_DEBUG
-    TOKEN_STATS tmp_token_stats;
-    init_token_stats(&tmp_token_stats);
-    token_stats->txb_coeff_cost_map[blk_row][blk_col] = tmp_token_stats.cost;
-    token_stats->cost += tmp_token_stats.cost;
-#endif
-  } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    const int bsw = tx_size_wide_unit[sub_txs];
-    const int bsh = tx_size_high_unit[sub_txs];
-    const int step = bsh * bsw;
-
-    assert(bsw > 0 && bsh > 0);
-
-    for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) {
-      for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw) {
-        const int offsetr = blk_row + r;
-        const int offsetc = blk_col + c;
-        if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-        pack_txb_tokens(w, cm, x, tp, tok_end, xd, mbmi, plane, plane_bsize,
-                        bit_depth, block, offsetr, offsetc, sub_txs,
-                        token_stats);
-        block += step;
-      }
-    }
-  }
-}
-
-static INLINE void set_spatial_segment_id(const AV1_COMMON *const cm,
-                                          uint8_t *segment_ids,
-                                          BLOCK_SIZE bsize, int mi_row,
-                                          int mi_col, int segment_id) {
-  const int mi_offset = mi_row * cm->mi_cols + mi_col;
-  const int bw = mi_size_wide[bsize];
-  const int bh = mi_size_high[bsize];
-  const int xmis = AOMMIN(cm->mi_cols - mi_col, bw);
-  const int ymis = AOMMIN(cm->mi_rows - mi_row, bh);
-  int x, y;
-
-  for (y = 0; y < ymis; ++y)
-    for (x = 0; x < xmis; ++x)
-      segment_ids[mi_offset + y * cm->mi_cols + x] = segment_id;
-}
-
-int av1_neg_interleave(int x, int ref, int max) {
-  assert(x < max);
-  const int diff = x - ref;
-  if (!ref) return x;
-  if (ref >= (max - 1)) return -x + max - 1;
-  if (2 * ref < max) {
-    if (abs(diff) <= ref) {
-      if (diff > 0)
-        return (diff << 1) - 1;
-      else
-        return ((-diff) << 1);
-    }
-    return x;
-  } else {
-    if (abs(diff) < (max - ref)) {
-      if (diff > 0)
-        return (diff << 1) - 1;
-      else
-        return ((-diff) << 1);
-    }
-    return (max - x) - 1;
-  }
-}
-
-static void write_segment_id(AV1_COMP *cpi, const MB_MODE_INFO *const mbmi,
-                             aom_writer *w, const struct segmentation *seg,
-                             struct segmentation_probs *segp, int mi_row,
-                             int mi_col, int skip) {
-  if (!seg->enabled || !seg->update_map) return;
-
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-  int cdf_num;
-  const int pred = av1_get_spatial_seg_pred(cm, xd, mi_row, mi_col, &cdf_num);
-
-  if (skip) {
-    // Still need to transmit tx size for intra blocks even if skip is
-    // true. Changing segment_id may make the tx size become invalid, e.g
-    // changing from lossless to lossy.
-    assert(is_inter_block(mbmi) || !cpi->has_lossless_segment);
-
-    set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type, mi_row,
-                           mi_col, pred);
-    set_spatial_segment_id(cm, cpi->segmentation_map, mbmi->sb_type, mi_row,
-                           mi_col, pred);
-    /* mbmi is read only but we need to update segment_id */
-    ((MB_MODE_INFO *)mbmi)->segment_id = pred;
-    return;
-  }
-
-  const int coded_id =
-      av1_neg_interleave(mbmi->segment_id, pred, seg->last_active_segid + 1);
-  aom_cdf_prob *pred_cdf = segp->spatial_pred_seg_cdf[cdf_num];
-  aom_write_symbol(w, coded_id, pred_cdf, MAX_SEGMENTS);
-  set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type, mi_row,
-                         mi_col, mbmi->segment_id);
-}
-
-#define WRITE_REF_BIT(bname, pname) \
-  aom_write_symbol(w, bname, av1_get_pred_cdf_##pname(xd), 2)
-
-// This function encodes the reference frame
-static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                             aom_writer *w) {
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int is_compound = has_second_ref(mbmi);
-  const int segment_id = mbmi->segment_id;
-
-  // If segment level coding of this signal is disabled...
-  // or the segment allows multiple reference frame options
-  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
-    assert(!is_compound);
-    assert(mbmi->ref_frame[0] ==
-           get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
-  } else if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP) ||
-             segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
-    assert(!is_compound);
-    assert(mbmi->ref_frame[0] == LAST_FRAME);
-  } else {
-    // does the feature use compound prediction or not
-    // (if not specified at the frame/segment level)
-    if (cm->reference_mode == REFERENCE_MODE_SELECT) {
-      if (is_comp_ref_allowed(mbmi->sb_type))
-        aom_write_symbol(w, is_compound, av1_get_reference_mode_cdf(xd), 2);
-    } else {
-      assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE));
-    }
-
-    if (is_compound) {
-      const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
-                                                    ? UNIDIR_COMP_REFERENCE
-                                                    : BIDIR_COMP_REFERENCE;
-      aom_write_symbol(w, comp_ref_type, av1_get_comp_reference_type_cdf(xd),
-                       2);
-
-      if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
-        const int bit = mbmi->ref_frame[0] == BWDREF_FRAME;
-        WRITE_REF_BIT(bit, uni_comp_ref_p);
-
-        if (!bit) {
-          assert(mbmi->ref_frame[0] == LAST_FRAME);
-          const int bit1 = mbmi->ref_frame[1] == LAST3_FRAME ||
-                           mbmi->ref_frame[1] == GOLDEN_FRAME;
-          WRITE_REF_BIT(bit1, uni_comp_ref_p1);
-          if (bit1) {
-            const int bit2 = mbmi->ref_frame[1] == GOLDEN_FRAME;
-            WRITE_REF_BIT(bit2, uni_comp_ref_p2);
-          }
-        } else {
-          assert(mbmi->ref_frame[1] == ALTREF_FRAME);
-        }
-
-        return;
-      }
-
-      assert(comp_ref_type == BIDIR_COMP_REFERENCE);
-
-      const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME ||
-                       mbmi->ref_frame[0] == LAST3_FRAME);
-      WRITE_REF_BIT(bit, comp_ref_p);
-
-      if (!bit) {
-        const int bit1 = mbmi->ref_frame[0] == LAST2_FRAME;
-        WRITE_REF_BIT(bit1, comp_ref_p1);
-      } else {
-        const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME;
-        WRITE_REF_BIT(bit2, comp_ref_p2);
-      }
-
-      const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME;
-      WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
-
-      if (!bit_bwd) {
-        WRITE_REF_BIT(mbmi->ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
-      }
-
-    } else {
-      const int bit0 = (mbmi->ref_frame[0] <= ALTREF_FRAME &&
-                        mbmi->ref_frame[0] >= BWDREF_FRAME);
-      WRITE_REF_BIT(bit0, single_ref_p1);
-
-      if (bit0) {
-        const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME;
-        WRITE_REF_BIT(bit1, single_ref_p2);
-
-        if (!bit1) {
-          WRITE_REF_BIT(mbmi->ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
-        }
-      } else {
-        const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME ||
-                          mbmi->ref_frame[0] == GOLDEN_FRAME);
-        WRITE_REF_BIT(bit2, single_ref_p3);
-
-        if (!bit2) {
-          const int bit3 = mbmi->ref_frame[0] != LAST_FRAME;
-          WRITE_REF_BIT(bit3, single_ref_p4);
-        } else {
-          const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME;
-          WRITE_REF_BIT(bit4, single_ref_p5);
-        }
-      }
-    }
-  }
-}
-
-static void write_filter_intra_mode_info(const AV1_COMMON *cm,
-                                         const MACROBLOCKD *xd,
-                                         const MB_MODE_INFO *const mbmi,
-                                         aom_writer *w) {
-  if (av1_filter_intra_allowed(cm, mbmi)) {
-    aom_write_symbol(w, mbmi->filter_intra_mode_info.use_filter_intra,
-                     xd->tile_ctx->filter_intra_cdfs[mbmi->sb_type], 2);
-    if (mbmi->filter_intra_mode_info.use_filter_intra) {
-      const FILTER_INTRA_MODE mode =
-          mbmi->filter_intra_mode_info.filter_intra_mode;
-      aom_write_symbol(w, mode, xd->tile_ctx->filter_intra_mode_cdf,
-                       FILTER_INTRA_MODES);
-    }
-  }
-}
-
-static void write_angle_delta(aom_writer *w, int angle_delta,
-                              aom_cdf_prob *cdf) {
-  aom_write_symbol(w, angle_delta + MAX_ANGLE_DELTA, cdf,
-                   2 * MAX_ANGLE_DELTA + 1);
-}
-
-static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
-                                   aom_writer *w) {
-  AV1_COMMON *const cm = &cpi->common;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
-  if (!av1_is_interp_needed(xd)) {
-    assert(mbmi->interp_filters ==
-           av1_broadcast_interp_filter(
-               av1_unswitchable_filter(cm->interp_filter)));
-    return;
-  }
-  if (cm->interp_filter == SWITCHABLE) {
-    int dir;
-    for (dir = 0; dir < 2; ++dir) {
-      const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-      InterpFilter filter =
-          av1_extract_interp_filter(mbmi->interp_filters, dir);
-      aom_write_symbol(w, filter, ec_ctx->switchable_interp_cdf[ctx],
-                       SWITCHABLE_FILTERS);
-      ++cpi->interp_filter_selected[0][filter];
-      if (cm->seq_params.enable_dual_filter == 0) return;
-    }
-  }
-}
-
-// Transmit color values with delta encoding. Write the first value as
-// literal, and the deltas between each value and the previous one. "min_val" is
-// the smallest possible value of the deltas.
-static void delta_encode_palette_colors(const int *colors, int num,
-                                        int bit_depth, int min_val,
-                                        aom_writer *w) {
-  if (num <= 0) return;
-  assert(colors[0] < (1 << bit_depth));
-  aom_write_literal(w, colors[0], bit_depth);
-  if (num == 1) return;
-  int max_delta = 0;
-  int deltas[PALETTE_MAX_SIZE];
-  memset(deltas, 0, sizeof(deltas));
-  for (int i = 1; i < num; ++i) {
-    assert(colors[i] < (1 << bit_depth));
-    const int delta = colors[i] - colors[i - 1];
-    deltas[i - 1] = delta;
-    assert(delta >= min_val);
-    if (delta > max_delta) max_delta = delta;
-  }
-  const int min_bits = bit_depth - 3;
-  int bits = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits);
-  assert(bits <= bit_depth);
-  int range = (1 << bit_depth) - colors[0] - min_val;
-  aom_write_literal(w, bits - min_bits, 2);
-  for (int i = 0; i < num - 1; ++i) {
-    aom_write_literal(w, deltas[i] - min_val, bits);
-    range -= deltas[i];
-    bits = AOMMIN(bits, av1_ceil_log2(range));
-  }
-}
-
-// Transmit luma palette color values. First signal if each color in the color
-// cache is used. Those colors that are not in the cache are transmitted with
-// delta encoding.
-static void write_palette_colors_y(const MACROBLOCKD *const xd,
-                                   const PALETTE_MODE_INFO *const pmi,
-                                   int bit_depth, aom_writer *w) {
-  const int n = pmi->palette_size[0];
-  uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-  const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
-  int out_cache_colors[PALETTE_MAX_SIZE];
-  uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
-  const int n_out_cache =
-      av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n,
-                            cache_color_found, out_cache_colors);
-  int n_in_cache = 0;
-  for (int i = 0; i < n_cache && n_in_cache < n; ++i) {
-    const int found = cache_color_found[i];
-    aom_write_bit(w, found);
-    n_in_cache += found;
-  }
-  assert(n_in_cache + n_out_cache == n);
-  delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 1, w);
-}
-
-// Write chroma palette color values. U channel is handled similarly to the luma
-// channel. For v channel, either use delta encoding or transmit raw values
-// directly, whichever costs less.
-static void write_palette_colors_uv(const MACROBLOCKD *const xd,
-                                    const PALETTE_MODE_INFO *const pmi,
-                                    int bit_depth, aom_writer *w) {
-  const int n = pmi->palette_size[1];
-  const uint16_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE;
-  const uint16_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE;
-  // U channel colors.
-  uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-  const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
-  int out_cache_colors[PALETTE_MAX_SIZE];
-  uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
-  const int n_out_cache = av1_index_color_cache(
-      color_cache, n_cache, colors_u, n, cache_color_found, out_cache_colors);
-  int n_in_cache = 0;
-  for (int i = 0; i < n_cache && n_in_cache < n; ++i) {
-    const int found = cache_color_found[i];
-    aom_write_bit(w, found);
-    n_in_cache += found;
-  }
-  delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 0, w);
-
-  // V channel colors. Don't use color cache as the colors are not sorted.
-  const int max_val = 1 << bit_depth;
-  int zero_count = 0, min_bits_v = 0;
-  int bits_v =
-      av1_get_palette_delta_bits_v(pmi, bit_depth, &zero_count, &min_bits_v);
-  const int rate_using_delta =
-      2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count;
-  const int rate_using_raw = bit_depth * n;
-  if (rate_using_delta < rate_using_raw) {  // delta encoding
-    assert(colors_v[0] < (1 << bit_depth));
-    aom_write_bit(w, 1);
-    aom_write_literal(w, bits_v - min_bits_v, 2);
-    aom_write_literal(w, colors_v[0], bit_depth);
-    for (int i = 1; i < n; ++i) {
-      assert(colors_v[i] < (1 << bit_depth));
-      if (colors_v[i] == colors_v[i - 1]) {  // No need to signal sign bit.
-        aom_write_literal(w, 0, bits_v);
-        continue;
-      }
-      const int delta = abs((int)colors_v[i] - colors_v[i - 1]);
-      const int sign_bit = colors_v[i] < colors_v[i - 1];
-      if (delta <= max_val - delta) {
-        aom_write_literal(w, delta, bits_v);
-        aom_write_bit(w, sign_bit);
-      } else {
-        aom_write_literal(w, max_val - delta, bits_v);
-        aom_write_bit(w, !sign_bit);
-      }
-    }
-  } else {  // Transmit raw values.
-    aom_write_bit(w, 0);
-    for (int i = 0; i < n; ++i) {
-      assert(colors_v[i] < (1 << bit_depth));
-      aom_write_literal(w, colors_v[i], bit_depth);
-    }
-  }
-}
-
-static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                                    const MB_MODE_INFO *const mbmi, int mi_row,
-                                    int mi_col, aom_writer *w) {
-  const int num_planes = av1_num_planes(cm);
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  assert(av1_allow_palette(cm->allow_screen_content_tools, bsize));
-  const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
-
-  if (mbmi->mode == DC_PRED) {
-    const int n = pmi->palette_size[0];
-    const int palette_y_mode_ctx = av1_get_palette_mode_ctx(xd);
-    aom_write_symbol(
-        w, n > 0,
-        xd->tile_ctx->palette_y_mode_cdf[bsize_ctx][palette_y_mode_ctx], 2);
-    if (n > 0) {
-      aom_write_symbol(w, n - PALETTE_MIN_SIZE,
-                       xd->tile_ctx->palette_y_size_cdf[bsize_ctx],
-                       PALETTE_SIZES);
-      write_palette_colors_y(xd, pmi, cm->seq_params.bit_depth, w);
-    }
-  }
-
-  const int uv_dc_pred =
-      num_planes > 1 && mbmi->uv_mode == UV_DC_PRED &&
-      is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
-                          xd->plane[1].subsampling_y);
-  if (uv_dc_pred) {
-    const int n = pmi->palette_size[1];
-    const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
-    aom_write_symbol(w, n > 0,
-                     xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2);
-    if (n > 0) {
-      aom_write_symbol(w, n - PALETTE_MIN_SIZE,
-                       xd->tile_ctx->palette_uv_size_cdf[bsize_ctx],
-                       PALETTE_SIZES);
-      write_palette_colors_uv(xd, pmi, cm->seq_params.bit_depth, w);
-    }
-  }
-}
-
-void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
-                       int blk_row, int blk_col, int plane, TX_SIZE tx_size,
-                       aom_writer *w) {
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const int is_inter = is_inter_block(mbmi);
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
-  // Only y plane's tx_type is transmitted
-  if (plane > 0) return;
-  PLANE_TYPE plane_type = get_plane_type(plane);
-  TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size,
-                                    cm->reduced_tx_set_used);
-
-  const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
-  if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
-      ((!cm->seg.enabled && cm->base_qindex > 0) ||
-       (cm->seg.enabled && xd->qindex[mbmi->segment_id] > 0)) &&
-      !mbmi->skip &&
-      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
-    const TxSetType tx_set_type =
-        av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used);
-    const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
-    // eset == 0 should correspond to a set with only DCT_DCT and there
-    // is no need to send the tx_type
-    assert(eset > 0);
-    assert(av1_ext_tx_used[tx_set_type][tx_type]);
-    if (is_inter) {
-      aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][tx_type],
-                       ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
-                       av1_num_ext_tx_set[tx_set_type]);
-    } else {
-      PREDICTION_MODE intra_dir;
-      if (mbmi->filter_intra_mode_info.use_filter_intra)
-        intra_dir =
-            fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode];
-      else
-        intra_dir = mbmi->mode;
-      aom_write_symbol(
-          w, av1_ext_tx_ind[tx_set_type][tx_type],
-          ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_dir],
-          av1_num_ext_tx_set[tx_set_type]);
-    }
-  }
-}
-
-static void write_intra_y_mode_nonkf(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize,
-                                     PREDICTION_MODE mode, aom_writer *w) {
-  aom_write_symbol(w, mode, frame_ctx->y_mode_cdf[size_group_lookup[bsize]],
-                   INTRA_MODES);
-}
-
-static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx,
-                                UV_PREDICTION_MODE uv_mode,
-                                PREDICTION_MODE y_mode,
-                                CFL_ALLOWED_TYPE cfl_allowed, aom_writer *w) {
-  aom_write_symbol(w, uv_mode, frame_ctx->uv_mode_cdf[cfl_allowed][y_mode],
-                   UV_INTRA_MODES - !cfl_allowed);
-}
-
-static void write_cfl_alphas(FRAME_CONTEXT *const ec_ctx, int idx,
-                             int joint_sign, aom_writer *w) {
-  aom_write_symbol(w, joint_sign, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS);
-  // Magnitudes are only signaled for nonzero codes.
-  if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
-    aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
-    aom_write_symbol(w, CFL_IDX_U(idx), cdf_u, CFL_ALPHABET_SIZE);
-  }
-  if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
-    aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
-    aom_write_symbol(w, CFL_IDX_V(idx), cdf_v, CFL_ALPHABET_SIZE);
-  }
-}
-
-static void write_cdef(AV1_COMMON *cm, MACROBLOCKD *const xd, aom_writer *w,
-                       int skip, int mi_col, int mi_row) {
-  if (cm->coded_lossless || cm->allow_intrabc) {
-    // Initialize to indicate no CDEF for safety.
-    cm->cdef_bits = 0;
-    cm->cdef_strengths[0] = 0;
-    cm->nb_cdef_strengths = 1;
-    cm->cdef_uv_strengths[0] = 0;
-    return;
-  }
-
-  const int m = ~((1 << (6 - MI_SIZE_LOG2)) - 1);
-  const MB_MODE_INFO *mbmi =
-      cm->mi_grid_visible[(mi_row & m) * cm->mi_stride + (mi_col & m)];
-  // Initialise when at top left part of the superblock
-  if (!(mi_row & (cm->seq_params.mib_size - 1)) &&
-      !(mi_col & (cm->seq_params.mib_size - 1))) {  // Top left?
-    xd->cdef_preset[0] = xd->cdef_preset[1] = xd->cdef_preset[2] =
-        xd->cdef_preset[3] = -1;
-  }
-
-  // Emit CDEF param at first non-skip coding block
-  const int mask = 1 << (6 - MI_SIZE_LOG2);
-  const int index = cm->seq_params.sb_size == BLOCK_128X128
-                        ? !!(mi_col & mask) + 2 * !!(mi_row & mask)
-                        : 0;
-  if (xd->cdef_preset[index] == -1 && !skip) {
-    aom_write_literal(w, mbmi->cdef_strength, cm->cdef_bits);
-    xd->cdef_preset[index] = mbmi->cdef_strength;
-  }
-}
-
-static void write_inter_segment_id(AV1_COMP *cpi, aom_writer *w,
-                                   const struct segmentation *const seg,
-                                   struct segmentation_probs *const segp,
-                                   int mi_row, int mi_col, int skip,
-                                   int preskip) {
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  AV1_COMMON *const cm = &cpi->common;
-
-  if (seg->update_map) {
-    if (preskip) {
-      if (!seg->segid_preskip) return;
-    } else {
-      if (seg->segid_preskip) return;
-      if (skip) {
-        write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 1);
-        if (seg->temporal_update) ((MB_MODE_INFO *)mbmi)->seg_id_predicted = 0;
-        return;
-      }
-    }
-    if (seg->temporal_update) {
-      const int pred_flag = mbmi->seg_id_predicted;
-      aom_cdf_prob *pred_cdf = av1_get_pred_cdf_seg_id(segp, xd);
-      aom_write_symbol(w, pred_flag, pred_cdf, 2);
-      if (!pred_flag) {
-        write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0);
-      }
-      if (pred_flag) {
-        set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type,
-                               mi_row, mi_col, mbmi->segment_id);
-      }
-    } else {
-      write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0);
-    }
-  }
-}
-
-// If delta q is present, writes delta_q index.
-// Also writes delta_q loop filter levels, if present.
-static void write_delta_q_params(AV1_COMP *cpi, const int mi_row,
-                                 const int mi_col, int skip, aom_writer *w) {
-  AV1_COMMON *const cm = &cpi->common;
-  if (cm->delta_q_present_flag) {
-    MACROBLOCK *const x = &cpi->td.mb;
-    MACROBLOCKD *const xd = &x->e_mbd;
-    const MB_MODE_INFO *const mbmi = xd->mi[0];
-    const BLOCK_SIZE bsize = mbmi->sb_type;
-    const int super_block_upper_left =
-        ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
-        ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
-
-    if ((bsize != cm->seq_params.sb_size || skip == 0) &&
-        super_block_upper_left) {
-      assert(mbmi->current_qindex > 0);
-      const int reduced_delta_qindex =
-          (mbmi->current_qindex - xd->current_qindex) / cm->delta_q_res;
-      write_delta_qindex(xd, reduced_delta_qindex, w);
-      xd->current_qindex = mbmi->current_qindex;
-      if (cm->delta_lf_present_flag) {
-        if (cm->delta_lf_multi) {
-          const int frame_lf_count =
-              av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
-          for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
-            int reduced_delta_lflevel =
-                (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
-                cm->delta_lf_res;
-            write_delta_lflevel(cm, xd, lf_id, reduced_delta_lflevel, w);
-            xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
-          }
-        } else {
-          int reduced_delta_lflevel =
-              (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
-              cm->delta_lf_res;
-          write_delta_lflevel(cm, xd, -1, reduced_delta_lflevel, w);
-          xd->delta_lf_from_base = mbmi->delta_lf_from_base;
-        }
-      }
-    }
-  }
-}
-
-static void write_intra_prediction_modes(AV1_COMP *cpi, const int mi_row,
-                                         const int mi_col, int is_keyframe,
-                                         aom_writer *w) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const PREDICTION_MODE mode = mbmi->mode;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-
-  // Y mode.
-  if (is_keyframe) {
-    const MB_MODE_INFO *const above_mi = xd->above_mbmi;
-    const MB_MODE_INFO *const left_mi = xd->left_mbmi;
-    write_intra_y_mode_kf(ec_ctx, mbmi, above_mi, left_mi, mode, w);
-  } else {
-    write_intra_y_mode_nonkf(ec_ctx, bsize, mode, w);
-  }
-
-  // Y angle delta.
-  const int use_angle_delta = av1_use_angle_delta(bsize);
-  if (use_angle_delta && av1_is_directional_mode(mode)) {
-    write_angle_delta(w, mbmi->angle_delta[PLANE_TYPE_Y],
-                      ec_ctx->angle_delta_cdf[mode - V_PRED]);
-  }
-
-  // UV mode and UV angle delta.
-  if (!cm->seq_params.monochrome &&
-      is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
-                          xd->plane[1].subsampling_y)) {
-    const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
-    write_intra_uv_mode(ec_ctx, uv_mode, mode, is_cfl_allowed(xd), w);
-    if (uv_mode == UV_CFL_PRED)
-      write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w);
-    if (use_angle_delta && av1_is_directional_mode(get_uv_mode(uv_mode))) {
-      write_angle_delta(w, mbmi->angle_delta[PLANE_TYPE_UV],
-                        ec_ctx->angle_delta_cdf[uv_mode - V_PRED]);
-    }
-  }
-
-  // Palette.
-  if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) {
-    write_palette_mode_info(cm, xd, mbmi, mi_row, mi_col, w);
-  }
-
-  // Filter intra.
-  write_filter_intra_mode_info(cm, xd, mbmi, w);
-}
-
-static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
-                                const int mi_col, aom_writer *w) {
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  const struct segmentation *const seg = &cm->seg;
-  struct segmentation_probs *const segp = &ec_ctx->seg;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-  const PREDICTION_MODE mode = mbmi->mode;
-  const int segment_id = mbmi->segment_id;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  const int allow_hp = cm->allow_high_precision_mv;
-  const int is_inter = is_inter_block(mbmi);
-  const int is_compound = has_second_ref(mbmi);
-  int ref;
-
-  write_inter_segment_id(cpi, w, seg, segp, mi_row, mi_col, 0, 1);
-
-  write_skip_mode(cm, xd, segment_id, mbmi, w);
-
-  assert(IMPLIES(mbmi->skip_mode, mbmi->skip));
-  const int skip =
-      mbmi->skip_mode ? 1 : write_skip(cm, xd, segment_id, mbmi, w);
-
-  write_inter_segment_id(cpi, w, seg, segp, mi_row, mi_col, skip, 0);
-
-  write_cdef(cm, xd, w, skip, mi_col, mi_row);
-
-  write_delta_q_params(cpi, mi_row, mi_col, skip, w);
-
-  if (!mbmi->skip_mode) write_is_inter(cm, xd, mbmi->segment_id, w, is_inter);
-
-  if (mbmi->skip_mode) return;
-
-  if (!is_inter) {
-    write_intra_prediction_modes(cpi, mi_row, mi_col, 0, w);
-  } else {
-    int16_t mode_ctx;
-
-    av1_collect_neighbors_ref_counts(xd);
-
-    write_ref_frames(cm, xd, w);
-
-    mode_ctx =
-        av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
-
-    // If segment skip is not enabled code the mode.
-    if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
-      if (is_inter_compound_mode(mode))
-        write_inter_compound_mode(xd, w, mode, mode_ctx);
-      else if (is_inter_singleref_mode(mode))
-        write_inter_mode(w, mode, ec_ctx, mode_ctx);
-
-      if (mode == NEWMV || mode == NEW_NEWMV || have_nearmv_in_inter_mode(mode))
-        write_drl_idx(ec_ctx, mbmi, mbmi_ext, w);
-      else
-        assert(mbmi->ref_mv_idx == 0);
-    }
-
-    if (mode == NEWMV || mode == NEW_NEWMV) {
-      for (ref = 0; ref < 1 + is_compound; ++ref) {
-        nmv_context *nmvc = &ec_ctx->nmvc;
-        const int_mv ref_mv = av1_get_ref_mv(x, ref);
-        av1_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, &ref_mv.as_mv, nmvc,
-                      allow_hp);
-      }
-    } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
-      nmv_context *nmvc = &ec_ctx->nmvc;
-      const int_mv ref_mv = av1_get_ref_mv(x, 1);
-      av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv, &ref_mv.as_mv, nmvc, allow_hp);
-    } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
-      nmv_context *nmvc = &ec_ctx->nmvc;
-      const int_mv ref_mv = av1_get_ref_mv(x, 0);
-      av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv, &ref_mv.as_mv, nmvc, allow_hp);
-    }
-
-    if (cpi->common.reference_mode != COMPOUND_REFERENCE &&
-        cpi->common.seq_params.enable_interintra_compound &&
-        is_interintra_allowed(mbmi)) {
-      const int interintra = mbmi->ref_frame[1] == INTRA_FRAME;
-      const int bsize_group = size_group_lookup[bsize];
-      aom_write_symbol(w, interintra, ec_ctx->interintra_cdf[bsize_group], 2);
-      if (interintra) {
-        aom_write_symbol(w, mbmi->interintra_mode,
-                         ec_ctx->interintra_mode_cdf[bsize_group],
-                         INTERINTRA_MODES);
-        if (is_interintra_wedge_used(bsize)) {
-          aom_write_symbol(w, mbmi->use_wedge_interintra,
-                           ec_ctx->wedge_interintra_cdf[bsize], 2);
-          if (mbmi->use_wedge_interintra) {
-            aom_write_symbol(w, mbmi->interintra_wedge_index,
-                             ec_ctx->wedge_idx_cdf[bsize], 16);
-            assert(mbmi->interintra_wedge_sign == 0);
-          }
-        }
-      }
-    }
-
-    if (mbmi->ref_frame[1] != INTRA_FRAME) write_motion_mode(cm, xd, mbmi, w);
-
-    // First write idx to indicate current compound inter prediction mode group
-    // Group A (0): jnt_comp, compound_average
-    // Group B (1): interintra, compound_diffwtd, wedge
-    if (has_second_ref(mbmi)) {
-      const int masked_compound_used = is_any_masked_compound_used(bsize) &&
-                                       cm->seq_params.enable_masked_compound;
-
-      if (masked_compound_used) {
-        const int ctx_comp_group_idx = get_comp_group_idx_context(xd);
-        aom_write_symbol(w, mbmi->comp_group_idx,
-                         ec_ctx->comp_group_idx_cdf[ctx_comp_group_idx], 2);
-      } else {
-        assert(mbmi->comp_group_idx == 0);
-      }
-
-      if (mbmi->comp_group_idx == 0) {
-        if (mbmi->compound_idx)
-          assert(mbmi->interinter_comp.type == COMPOUND_AVERAGE);
-
-        if (cm->seq_params.enable_jnt_comp) {
-          const int comp_index_ctx = get_comp_index_context(cm, xd);
-          aom_write_symbol(w, mbmi->compound_idx,
-                           ec_ctx->compound_index_cdf[comp_index_ctx], 2);
-        } else {
-          assert(mbmi->compound_idx == 1);
-        }
-      } else {
-        assert(cpi->common.reference_mode != SINGLE_REFERENCE &&
-               is_inter_compound_mode(mbmi->mode) &&
-               mbmi->motion_mode == SIMPLE_TRANSLATION);
-        assert(masked_compound_used);
-        // compound_diffwtd, wedge
-        assert(mbmi->interinter_comp.type == COMPOUND_WEDGE ||
-               mbmi->interinter_comp.type == COMPOUND_DIFFWTD);
-
-        if (is_interinter_compound_used(COMPOUND_WEDGE, bsize))
-          aom_write_symbol(w, mbmi->interinter_comp.type - 1,
-                           ec_ctx->compound_type_cdf[bsize],
-                           COMPOUND_TYPES - 1);
-
-        if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
-          assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
-          aom_write_symbol(w, mbmi->interinter_comp.wedge_index,
-                           ec_ctx->wedge_idx_cdf[bsize], 16);
-          aom_write_bit(w, mbmi->interinter_comp.wedge_sign);
-        } else {
-          assert(mbmi->interinter_comp.type == COMPOUND_DIFFWTD);
-          aom_write_literal(w, mbmi->interinter_comp.mask_type,
-                            MAX_DIFFWTD_MASK_BITS);
-        }
-      }
-    }
-
-    write_mb_interp_filter(cpi, xd, w);
-  }
-}
-
-static void write_intrabc_info(MACROBLOCKD *xd,
-                               const MB_MODE_INFO_EXT *mbmi_ext,
-                               aom_writer *w) {
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  int use_intrabc = is_intrabc_block(mbmi);
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2);
-  if (use_intrabc) {
-    assert(mbmi->mode == DC_PRED);
-    assert(mbmi->uv_mode == UV_DC_PRED);
-    assert(mbmi->motion_mode == SIMPLE_TRANSLATION);
-    int_mv dv_ref = mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv;
-    av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc);
-  }
-}
-
-static void write_mb_modes_kf(AV1_COMP *cpi, MACROBLOCKD *xd,
-                              const MB_MODE_INFO_EXT *mbmi_ext,
-                              const int mi_row, const int mi_col,
-                              aom_writer *w) {
-  AV1_COMMON *const cm = &cpi->common;
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  const struct segmentation *const seg = &cm->seg;
-  struct segmentation_probs *const segp = &ec_ctx->seg;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-
-  if (seg->segid_preskip && seg->update_map)
-    write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0);
-
-  const int skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w);
-
-  if (!seg->segid_preskip && seg->update_map)
-    write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, skip);
-
-  write_cdef(cm, xd, w, skip, mi_col, mi_row);
-
-  write_delta_q_params(cpi, mi_row, mi_col, skip, w);
-
-  if (av1_allow_intrabc(cm)) {
-    write_intrabc_info(xd, mbmi_ext, w);
-    if (is_intrabc_block(mbmi)) return;
-  }
-
-  write_intra_prediction_modes(cpi, mi_row, mi_col, 1, w);
-}
-
-#if CONFIG_RD_DEBUG
-static void dump_mode_info(MODE_INFO *mi) {
-  printf("\nmi->mi_row == %d\n", mi->mi_row);
-  printf("&& mi->mi_col == %d\n", mi->mi_col);
-  printf("&& mi->sb_type == %d\n", mi->sb_type);
-  printf("&& mi->tx_size == %d\n", mi->tx_size);
-  printf("&& mi->mode == %d\n", mi->mode);
-}
-static int rd_token_stats_mismatch(RD_STATS *rd_stats, TOKEN_STATS *token_stats,
-                                   int plane) {
-  if (rd_stats->txb_coeff_cost[plane] != token_stats->cost) {
-    int r, c;
-    printf("\nplane %d rd_stats->txb_coeff_cost %d token_stats->cost %d\n",
-           plane, rd_stats->txb_coeff_cost[plane], token_stats->cost);
-    printf("rd txb_coeff_cost_map\n");
-    for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) {
-      for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
-        printf("%d ", rd_stats->txb_coeff_cost_map[plane][r][c]);
-      }
-      printf("\n");
-    }
-
-    printf("pack txb_coeff_cost_map\n");
-    for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) {
-      for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
-        printf("%d ", token_stats->txb_coeff_cost_map[r][c]);
-      }
-      printf("\n");
-    }
-    return 1;
-  }
-  return 0;
-}
-#endif
-
-#if ENC_MISMATCH_DEBUG
-static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-  xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
-  const MB_MODE_INFO *const *mbmi = xd->mi[0];
-  if (is_inter_block(mbmi)) {
-#define FRAME_TO_CHECK 11
-    if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) {
-      const BLOCK_SIZE bsize = mbmi->sb_type;
-
-      int_mv mv[2];
-      int is_comp_ref = has_second_ref(mbmi);
-      int ref;
-
-      for (ref = 0; ref < 1 + is_comp_ref; ++ref)
-        mv[ref].as_mv = mbmi->mv[ref].as_mv;
-
-      if (!is_comp_ref) {
-        mv[1].as_int = 0;
-      }
-
-      MACROBLOCK *const x = &cpi->td.mb;
-      const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-      const int16_t mode_ctx =
-          is_comp_ref ? mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]
-                      : av1_mode_context_analyzer(mbmi_ext->mode_context,
-                                                  mbmi->ref_frame);
-
-      const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
-      int16_t zeromv_ctx = -1;
-      int16_t refmv_ctx = -1;
-
-      if (mbmi->mode != NEWMV) {
-        zeromv_ctx = (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
-        if (mbmi->mode != GLOBALMV)
-          refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
-      }
-
-      printf(
-          "=== ENCODER ===: "
-          "Frame=%d, (mi_row,mi_col)=(%d,%d), skip_mode=%d, mode=%d, bsize=%d, "
-          "show_frame=%d, mv[0]=(%d,%d), mv[1]=(%d,%d), ref[0]=%d, "
-          "ref[1]=%d, motion_mode=%d, mode_ctx=%d, "
-          "newmv_ctx=%d, zeromv_ctx=%d, refmv_ctx=%d, tx_size=%d\n",
-          cm->current_video_frame, mi_row, mi_col, mbmi->skip_mode, mbmi->mode,
-          bsize, cm->show_frame, mv[0].as_mv.row, mv[0].as_mv.col,
-          mv[1].as_mv.row, mv[1].as_mv.col, mbmi->ref_frame[0],
-          mbmi->ref_frame[1], mbmi->motion_mode, mode_ctx, newmv_ctx,
-          zeromv_ctx, refmv_ctx, mbmi->tx_size);
-    }
-  }
-}
-#endif  // ENC_MISMATCH_DEBUG
-
-static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile,
-                         aom_writer *w, int mi_row, int mi_col) {
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-  int bh, bw;
-  xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
-  MB_MODE_INFO *m = xd->mi[0];
-
-  assert(m->sb_type <= cm->seq_params.sb_size ||
-         (m->sb_type >= BLOCK_SIZES && m->sb_type < BLOCK_SIZES_ALL));
-
-  bh = mi_size_high[m->sb_type];
-  bw = mi_size_wide[m->sb_type];
-
-  cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
-
-  set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
-  xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col;
-  xd->left_txfm_context =
-      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
-  if (frame_is_intra_only(cm)) {
-    write_mb_modes_kf(cpi, xd, cpi->td.mb.mbmi_ext, mi_row, mi_col, w);
-  } else {
-    // has_subpel_mv_component needs the ref frame buffers set up to look
-    // up if they are scaled. has_subpel_mv_component is in turn needed by
-    // write_switchable_interp_filter, which is called by pack_inter_mode_mvs.
-    set_ref_ptrs(cm, xd, m->ref_frame[0], m->ref_frame[1]);
-
-#if ENC_MISMATCH_DEBUG
-    enc_dump_logs(cpi, mi_row, mi_col);
-#endif  // ENC_MISMATCH_DEBUG
-
-    pack_inter_mode_mvs(cpi, mi_row, mi_col, w);
-  }
-}
-
-static void write_inter_txb_coeff(AV1_COMMON *const cm, MACROBLOCK *const x,
-                                  MB_MODE_INFO *const mbmi, aom_writer *w,
-                                  const TOKENEXTRA **tok,
-                                  const TOKENEXTRA *const tok_end,
-                                  TOKEN_STATS *token_stats, const int row,
-                                  const int col, int *block, const int plane) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  const BLOCK_SIZE bsizec =
-      scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
-
-  const BLOCK_SIZE plane_bsize =
-      get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y);
-
-  const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane);
-  const int step =
-      tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
-  const int bkw = tx_size_wide_unit[max_tx_size];
-  const int bkh = tx_size_high_unit[max_tx_size];
-
-  const BLOCK_SIZE max_unit_bsize =
-      get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
-  int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
-  int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
-  int blk_row, blk_col;
-
-  const int num_4x4_w = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  const int num_4x4_h = block_size_high[plane_bsize] >> tx_size_high_log2[0];
-
-  const int unit_height =
-      AOMMIN(mu_blocks_high + (row >> pd->subsampling_y), num_4x4_h);
-  const int unit_width =
-      AOMMIN(mu_blocks_wide + (col >> pd->subsampling_x), num_4x4_w);
-  for (blk_row = row >> pd->subsampling_y; blk_row < unit_height;
-       blk_row += bkh) {
-    for (blk_col = col >> pd->subsampling_x; blk_col < unit_width;
-         blk_col += bkw) {
-      pack_txb_tokens(w, cm, x, tok, tok_end, xd, mbmi, plane, plane_bsize,
-                      cm->seq_params.bit_depth, *block, blk_row, blk_col,
-                      max_tx_size, token_stats);
-      *block += step;
-    }
-  }
-}
-
-static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
-                           aom_writer *w, const TOKENEXTRA **tok,
-                           const TOKENEXTRA *const tok_end, int mi_row,
-                           int mi_col) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-  const int mi_offset = mi_row * cm->mi_stride + mi_col;
-  MB_MODE_INFO *const mbmi = *(cm->mi_grid_visible + mi_offset);
-  int plane;
-  int bh, bw;
-  MACROBLOCK *const x = &cpi->td.mb;
-  (void)tok;
-  (void)tok_end;
-  xd->mi = cm->mi_grid_visible + mi_offset;
-
-  assert(mbmi->sb_type <= cm->seq_params.sb_size ||
-         (mbmi->sb_type >= BLOCK_SIZES && mbmi->sb_type < BLOCK_SIZES_ALL));
-
-  bh = mi_size_high[mbmi->sb_type];
-  bw = mi_size_wide[mbmi->sb_type];
-  cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
-
-  set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
-  if (!mbmi->skip) {
-    if (!is_inter_block(mbmi))
-      av1_write_coeffs_mb(cm, x, mi_row, mi_col, w, mbmi->sb_type);
-
-    if (is_inter_block(mbmi)) {
-      int block[MAX_MB_PLANE] = { 0 };
-      const BLOCK_SIZE plane_bsize = mbmi->sb_type;
-      assert(plane_bsize == get_plane_block_size(mbmi->sb_type,
-                                                 xd->plane[0].subsampling_x,
-                                                 xd->plane[0].subsampling_y));
-      const int num_4x4_w =
-          block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-      const int num_4x4_h =
-          block_size_high[plane_bsize] >> tx_size_high_log2[0];
-      int row, col;
-      TOKEN_STATS token_stats;
-      init_token_stats(&token_stats);
-
-      const BLOCK_SIZE max_unit_bsize = BLOCK_64X64;
-      assert(max_unit_bsize ==
-             get_plane_block_size(BLOCK_64X64, xd->plane[0].subsampling_x,
-                                  xd->plane[0].subsampling_y));
-      int mu_blocks_wide =
-          block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
-      int mu_blocks_high =
-          block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
-      mu_blocks_wide = AOMMIN(num_4x4_w, mu_blocks_wide);
-      mu_blocks_high = AOMMIN(num_4x4_h, mu_blocks_high);
-
-      for (row = 0; row < num_4x4_h; row += mu_blocks_high) {
-        for (col = 0; col < num_4x4_w; col += mu_blocks_wide) {
-          for (plane = 0; plane < num_planes && is_inter_block(mbmi); ++plane) {
-            const struct macroblockd_plane *const pd = &xd->plane[plane];
-            if (!is_chroma_reference(mi_row, mi_col, mbmi->sb_type,
-                                     pd->subsampling_x, pd->subsampling_y)) {
-              continue;
-            }
-            write_inter_txb_coeff(cm, x, mbmi, w, tok, tok_end, &token_stats,
-                                  row, col, &block[plane], plane);
-          }
-        }
-#if CONFIG_RD_DEBUG
-        if (mbmi->sb_type >= BLOCK_8X8 &&
-            rd_token_stats_mismatch(&mbmi->rd_stats, &token_stats, plane)) {
-          dump_mode_info(m);
-          assert(0);
-        }
-#endif  // CONFIG_RD_DEBUG
-      }
-    }
-  }
-}
-
-static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
-                          aom_writer *w, const TOKENEXTRA **tok,
-                          const TOKENEXTRA *const tok_end, int mi_row,
-                          int mi_col) {
-  write_mbmi_b(cpi, tile, w, mi_row, mi_col);
-
-  AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  for (int plane = 0; plane < AOMMIN(2, av1_num_planes(cm)); ++plane) {
-    const uint8_t palette_size_plane =
-        mbmi->palette_mode_info.palette_size[plane];
-    assert(!mbmi->skip_mode || !palette_size_plane);
-    if (palette_size_plane > 0) {
-      assert(mbmi->use_intrabc == 0);
-      assert(av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type));
-      int rows, cols;
-      av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows,
-                               &cols);
-      assert(*tok < tok_end);
-      pack_map_tokens(w, tok, palette_size_plane, rows * cols);
-    }
-  }
-
-  BLOCK_SIZE bsize = mbmi->sb_type;
-  int is_inter_tx = is_inter_block(mbmi) || is_intrabc_block(mbmi);
-  int skip = mbmi->skip;
-  int segment_id = mbmi->segment_id;
-  if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) &&
-      !(is_inter_tx && skip) && !xd->lossless[segment_id]) {
-    if (is_inter_tx) {  // This implies skip flag is 0.
-      const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, bsize, 0);
-      const int txbh = tx_size_high_unit[max_tx_size];
-      const int txbw = tx_size_wide_unit[max_tx_size];
-      const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
-      const int height = block_size_high[bsize] >> tx_size_high_log2[0];
-      int idx, idy;
-      for (idy = 0; idy < height; idy += txbh)
-        for (idx = 0; idx < width; idx += txbw)
-          write_tx_size_vartx(xd, mbmi, max_tx_size, 0, idy, idx, w);
-    } else {
-      write_selected_tx_size(xd, w);
-      set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h, 0, xd);
-    }
-  } else {
-    set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h,
-                  skip && is_inter_block(mbmi), xd);
-  }
-
-  write_tokens_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-}
-
-static void write_partition(const AV1_COMMON *const cm,
-                            const MACROBLOCKD *const xd, int hbs, int mi_row,
-                            int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize,
-                            aom_writer *w) {
-  const int is_partition_point = bsize >= BLOCK_8X8;
-
-  if (!is_partition_point) return;
-
-  const int has_rows = (mi_row + hbs) < cm->mi_rows;
-  const int has_cols = (mi_col + hbs) < cm->mi_cols;
-  const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-
-  if (!has_rows && !has_cols) {
-    assert(p == PARTITION_SPLIT);
-    return;
-  }
-
-  if (has_rows && has_cols) {
-    aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx],
-                     partition_cdf_length(bsize));
-  } else if (!has_rows && has_cols) {
-    assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
-    assert(bsize > BLOCK_8X8);
-    aom_cdf_prob cdf[2];
-    partition_gather_vert_alike(cdf, ec_ctx->partition_cdf[ctx], bsize);
-    aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2);
-  } else {
-    assert(has_rows && !has_cols);
-    assert(p == PARTITION_SPLIT || p == PARTITION_VERT);
-    assert(bsize > BLOCK_8X8);
-    aom_cdf_prob cdf[2];
-    partition_gather_horz_alike(cdf, ec_ctx->partition_cdf[ctx], bsize);
-    aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2);
-  }
-}
-
-static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
-                           aom_writer *const w, const TOKENEXTRA **tok,
-                           const TOKENEXTRA *const tok_end, int mi_row,
-                           int mi_col, BLOCK_SIZE bsize) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-  const int hbs = mi_size_wide[bsize] / 2;
-  const int quarter_step = mi_size_wide[bsize] / 4;
-  int i;
-  const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
-  const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
-
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
-  const int num_planes = av1_num_planes(cm);
-  for (int plane = 0; plane < num_planes; ++plane) {
-    int rcol0, rcol1, rrow0, rrow1;
-    if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
-                                           &rcol0, &rcol1, &rrow0, &rrow1)) {
-      const int rstride = cm->rst_info[plane].horz_units_per_tile;
-      for (int rrow = rrow0; rrow < rrow1; ++rrow) {
-        for (int rcol = rcol0; rcol < rcol1; ++rcol) {
-          const int runit_idx = rcol + rrow * rstride;
-          const RestorationUnitInfo *rui =
-              &cm->rst_info[plane].unit_info[runit_idx];
-          loop_restoration_write_sb_coeffs(cm, xd, rui, w, plane,
-                                           cpi->td.counts);
-        }
-      }
-    }
-  }
-
-  write_partition(cm, xd, hbs, mi_row, mi_col, partition, bsize, w);
-  switch (partition) {
-    case PARTITION_NONE:
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-      break;
-    case PARTITION_HORZ:
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-      if (mi_row + hbs < cm->mi_rows)
-        write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
-      break;
-    case PARTITION_VERT:
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-      if (mi_col + hbs < cm->mi_cols)
-        write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
-      break;
-    case PARTITION_SPLIT:
-      write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize);
-      write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs, subsize);
-      write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col, subsize);
-      write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs,
-                     subsize);
-      break;
-    case PARTITION_HORZ_A:
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
-      break;
-    case PARTITION_HORZ_B:
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs);
-      break;
-    case PARTITION_VERT_A:
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
-      break;
-    case PARTITION_VERT_B:
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
-      write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs);
-      break;
-    case PARTITION_HORZ_4:
-      for (i = 0; i < 4; ++i) {
-        int this_mi_row = mi_row + i * quarter_step;
-        if (i > 0 && this_mi_row >= cm->mi_rows) break;
-
-        write_modes_b(cpi, tile, w, tok, tok_end, this_mi_row, mi_col);
-      }
-      break;
-    case PARTITION_VERT_4:
-      for (i = 0; i < 4; ++i) {
-        int this_mi_col = mi_col + i * quarter_step;
-        if (i > 0 && this_mi_col >= cm->mi_cols) break;
-
-        write_modes_b(cpi, tile, w, tok, tok_end, mi_row, this_mi_col);
-      }
-      break;
-    default: assert(0);
-  }
-
-  // update partition context
-  update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
-}
-
-static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
-                        aom_writer *const w, int tile_row, int tile_col) {
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-  const int mi_row_start = tile->mi_row_start;
-  const int mi_row_end = tile->mi_row_end;
-  const int mi_col_start = tile->mi_col_start;
-  const int mi_col_end = tile->mi_col_end;
-  int mi_row, mi_col, sb_row_in_tile;
-
-  av1_zero_above_context(cm, xd, mi_col_start, mi_col_end, tile->tile_row);
-  av1_init_above_context(cm, xd, tile->tile_row);
-
-  if (cpi->common.delta_q_present_flag) {
-    xd->current_qindex = cpi->common.base_qindex;
-    if (cpi->common.delta_lf_present_flag) {
-      av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
-    }
-  }
-
-  for (mi_row = mi_row_start; mi_row < mi_row_end;
-       mi_row += cm->seq_params.mib_size) {
-    sb_row_in_tile =
-        (mi_row - tile->mi_row_start) >> cm->seq_params.mib_size_log2;
-    const TOKENEXTRA *tok =
-        cpi->tplist[tile_row][tile_col][sb_row_in_tile].start;
-    const TOKENEXTRA *tok_end =
-        tok + cpi->tplist[tile_row][tile_col][sb_row_in_tile].count;
-
-    av1_zero_left_context(xd);
-
-    for (mi_col = mi_col_start; mi_col < mi_col_end;
-         mi_col += cm->seq_params.mib_size) {
-      write_modes_sb(cpi, tile, w, &tok, tok_end, mi_row, mi_col,
-                     cm->seq_params.sb_size);
-    }
-    assert(tok == cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop);
-  }
-}
-
-static void encode_restoration_mode(AV1_COMMON *cm,
-                                    struct aom_write_bit_buffer *wb) {
-  assert(!cm->all_lossless);
-  if (!cm->seq_params.enable_restoration) return;
-  if (cm->allow_intrabc) return;
-  const int num_planes = av1_num_planes(cm);
-  int all_none = 1, chroma_none = 1;
-  for (int p = 0; p < num_planes; ++p) {
-    RestorationInfo *rsi = &cm->rst_info[p];
-    if (rsi->frame_restoration_type != RESTORE_NONE) {
-      all_none = 0;
-      chroma_none &= p == 0;
-    }
-    switch (rsi->frame_restoration_type) {
-      case RESTORE_NONE:
-        aom_wb_write_bit(wb, 0);
-        aom_wb_write_bit(wb, 0);
-        break;
-      case RESTORE_WIENER:
-        aom_wb_write_bit(wb, 1);
-        aom_wb_write_bit(wb, 0);
-        break;
-      case RESTORE_SGRPROJ:
-        aom_wb_write_bit(wb, 1);
-        aom_wb_write_bit(wb, 1);
-        break;
-      case RESTORE_SWITCHABLE:
-        aom_wb_write_bit(wb, 0);
-        aom_wb_write_bit(wb, 1);
-        break;
-      default: assert(0);
-    }
-  }
-  if (!all_none) {
-    assert(cm->seq_params.sb_size == BLOCK_64X64 ||
-           cm->seq_params.sb_size == BLOCK_128X128);
-    const int sb_size = cm->seq_params.sb_size == BLOCK_128X128 ? 128 : 64;
-
-    RestorationInfo *rsi = &cm->rst_info[0];
-
-    assert(rsi->restoration_unit_size >= sb_size);
-    assert(RESTORATION_UNITSIZE_MAX == 256);
-
-    if (sb_size == 64) {
-      aom_wb_write_bit(wb, rsi->restoration_unit_size > 64);
-    }
-    if (rsi->restoration_unit_size > 64) {
-      aom_wb_write_bit(wb, rsi->restoration_unit_size > 128);
-    }
-  }
-
-  if (num_planes > 1) {
-    int s = AOMMIN(cm->seq_params.subsampling_x, cm->seq_params.subsampling_y);
-    if (s && !chroma_none) {
-      aom_wb_write_bit(wb, cm->rst_info[1].restoration_unit_size !=
-                               cm->rst_info[0].restoration_unit_size);
-      assert(cm->rst_info[1].restoration_unit_size ==
-                 cm->rst_info[0].restoration_unit_size ||
-             cm->rst_info[1].restoration_unit_size ==
-                 (cm->rst_info[0].restoration_unit_size >> s));
-      assert(cm->rst_info[2].restoration_unit_size ==
-             cm->rst_info[1].restoration_unit_size);
-    } else if (!s) {
-      assert(cm->rst_info[1].restoration_unit_size ==
-             cm->rst_info[0].restoration_unit_size);
-      assert(cm->rst_info[2].restoration_unit_size ==
-             cm->rst_info[1].restoration_unit_size);
-    }
-  }
-}
-
-static void write_wiener_filter(int wiener_win, const WienerInfo *wiener_info,
-                                WienerInfo *ref_wiener_info, aom_writer *wb) {
-  if (wiener_win == WIENER_WIN)
-    aom_write_primitive_refsubexpfin(
-        wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-        WIENER_FILT_TAP0_SUBEXP_K,
-        ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
-        wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
-  else
-    assert(wiener_info->vfilter[0] == 0 &&
-           wiener_info->vfilter[WIENER_WIN - 1] == 0);
-  aom_write_primitive_refsubexpfin(
-      wb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
-      WIENER_FILT_TAP1_SUBEXP_K,
-      ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV,
-      wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV);
-  aom_write_primitive_refsubexpfin(
-      wb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
-      WIENER_FILT_TAP2_SUBEXP_K,
-      ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
-      wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV);
-  if (wiener_win == WIENER_WIN)
-    aom_write_primitive_refsubexpfin(
-        wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-        WIENER_FILT_TAP0_SUBEXP_K,
-        ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
-        wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
-  else
-    assert(wiener_info->hfilter[0] == 0 &&
-           wiener_info->hfilter[WIENER_WIN - 1] == 0);
-  aom_write_primitive_refsubexpfin(
-      wb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
-      WIENER_FILT_TAP1_SUBEXP_K,
-      ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV,
-      wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV);
-  aom_write_primitive_refsubexpfin(
-      wb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
-      WIENER_FILT_TAP2_SUBEXP_K,
-      ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV,
-      wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV);
-  memcpy(ref_wiener_info, wiener_info, sizeof(*wiener_info));
-}
-
-static void write_sgrproj_filter(const SgrprojInfo *sgrproj_info,
-                                 SgrprojInfo *ref_sgrproj_info,
-                                 aom_writer *wb) {
-  aom_write_literal(wb, sgrproj_info->ep, SGRPROJ_PARAMS_BITS);
-  const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
-
-  if (params->r[0] == 0) {
-    assert(sgrproj_info->xqd[0] == 0);
-    aom_write_primitive_refsubexpfin(
-        wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
-        ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
-        sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
-  } else if (params->r[1] == 0) {
-    aom_write_primitive_refsubexpfin(
-        wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
-        ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
-        sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
-  } else {
-    aom_write_primitive_refsubexpfin(
-        wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
-        ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
-        sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
-    aom_write_primitive_refsubexpfin(
-        wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
-        ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
-        sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
-  }
-
-  memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
-}
-
-static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
-                                             MACROBLOCKD *xd,
-                                             const RestorationUnitInfo *rui,
-                                             aom_writer *const w, int plane,
-                                             FRAME_COUNTS *counts) {
-  const RestorationInfo *rsi = cm->rst_info + plane;
-  RestorationType frame_rtype = rsi->frame_restoration_type;
-  if (frame_rtype == RESTORE_NONE) return;
-
-  (void)counts;
-  assert(!cm->all_lossless);
-
-  const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN;
-  WienerInfo *wiener_info = xd->wiener_info + plane;
-  SgrprojInfo *sgrproj_info = xd->sgrproj_info + plane;
-  RestorationType unit_rtype = rui->restoration_type;
-
-  if (frame_rtype == RESTORE_SWITCHABLE) {
-    aom_write_symbol(w, unit_rtype, xd->tile_ctx->switchable_restore_cdf,
-                     RESTORE_SWITCHABLE_TYPES);
-#if CONFIG_ENTROPY_STATS
-    ++counts->switchable_restore[unit_rtype];
-#endif
-    switch (unit_rtype) {
-      case RESTORE_WIENER:
-        write_wiener_filter(wiener_win, &rui->wiener_info, wiener_info, w);
-        break;
-      case RESTORE_SGRPROJ:
-        write_sgrproj_filter(&rui->sgrproj_info, sgrproj_info, w);
-        break;
-      default: assert(unit_rtype == RESTORE_NONE); break;
-    }
-  } else if (frame_rtype == RESTORE_WIENER) {
-    aom_write_symbol(w, unit_rtype != RESTORE_NONE,
-                     xd->tile_ctx->wiener_restore_cdf, 2);
-#if CONFIG_ENTROPY_STATS
-    ++counts->wiener_restore[unit_rtype != RESTORE_NONE];
-#endif
-    if (unit_rtype != RESTORE_NONE) {
-      write_wiener_filter(wiener_win, &rui->wiener_info, wiener_info, w);
-    }
-  } else if (frame_rtype == RESTORE_SGRPROJ) {
-    aom_write_symbol(w, unit_rtype != RESTORE_NONE,
-                     xd->tile_ctx->sgrproj_restore_cdf, 2);
-#if CONFIG_ENTROPY_STATS
-    ++counts->sgrproj_restore[unit_rtype != RESTORE_NONE];
-#endif
-    if (unit_rtype != RESTORE_NONE) {
-      write_sgrproj_filter(&rui->sgrproj_info, sgrproj_info, w);
-    }
-  }
-}
-
-static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
-  assert(!cm->coded_lossless);
-  if (cm->allow_intrabc) return;
-  const int num_planes = av1_num_planes(cm);
-  int i;
-  struct loopfilter *lf = &cm->lf;
-
-  // Encode the loop filter level and type
-  aom_wb_write_literal(wb, lf->filter_level[0], 6);
-  aom_wb_write_literal(wb, lf->filter_level[1], 6);
-  if (num_planes > 1) {
-    if (lf->filter_level[0] || lf->filter_level[1]) {
-      aom_wb_write_literal(wb, lf->filter_level_u, 6);
-      aom_wb_write_literal(wb, lf->filter_level_v, 6);
-    }
-  }
-  aom_wb_write_literal(wb, lf->sharpness_level, 3);
-
-  // Write out loop filter deltas applied at the MB level based on mode or
-  // ref frame (if they are enabled).
-  aom_wb_write_bit(wb, lf->mode_ref_delta_enabled);
-
-  if (lf->mode_ref_delta_enabled) {
-    aom_wb_write_bit(wb, lf->mode_ref_delta_update);
-
-    if (lf->mode_ref_delta_update) {
-      const int prime_idx = cm->primary_ref_frame;
-      const int buf_idx =
-          prime_idx == PRIMARY_REF_NONE ? -1 : cm->frame_refs[prime_idx].idx;
-      int8_t last_ref_deltas[REF_FRAMES];
-      if (prime_idx == PRIMARY_REF_NONE || buf_idx < 0) {
-        av1_set_default_ref_deltas(last_ref_deltas);
-      } else {
-        memcpy(last_ref_deltas, cm->buffer_pool->frame_bufs[buf_idx].ref_deltas,
-               REF_FRAMES);
-      }
-      for (i = 0; i < REF_FRAMES; i++) {
-        const int delta = lf->ref_deltas[i];
-        const int changed = delta != last_ref_deltas[i];
-        aom_wb_write_bit(wb, changed);
-        if (changed) aom_wb_write_inv_signed_literal(wb, delta, 6);
-      }
-
-      int8_t last_mode_deltas[MAX_MODE_LF_DELTAS];
-      if (prime_idx == PRIMARY_REF_NONE || buf_idx < 0) {
-        av1_set_default_mode_deltas(last_mode_deltas);
-      } else {
-        memcpy(last_mode_deltas,
-               cm->buffer_pool->frame_bufs[buf_idx].mode_deltas,
-               MAX_MODE_LF_DELTAS);
-      }
-      for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
-        const int delta = lf->mode_deltas[i];
-        const int changed = delta != last_mode_deltas[i];
-        aom_wb_write_bit(wb, changed);
-        if (changed) aom_wb_write_inv_signed_literal(wb, delta, 6);
-      }
-    }
-  }
-}
-
-static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
-  assert(!cm->coded_lossless);
-  if (!cm->seq_params.enable_cdef) return;
-  if (cm->allow_intrabc) return;
-  const int num_planes = av1_num_planes(cm);
-  int i;
-  aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2);
-  assert(cm->cdef_pri_damping == cm->cdef_sec_damping);
-  aom_wb_write_literal(wb, cm->cdef_bits, 2);
-  for (i = 0; i < cm->nb_cdef_strengths; i++) {
-    aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
-    if (num_planes > 1)
-      aom_wb_write_literal(wb, cm->cdef_uv_strengths[i], CDEF_STRENGTH_BITS);
-  }
-}
-
-static void write_delta_q(struct aom_write_bit_buffer *wb, int delta_q) {
-  if (delta_q != 0) {
-    aom_wb_write_bit(wb, 1);
-    aom_wb_write_inv_signed_literal(wb, delta_q, 6);
-  } else {
-    aom_wb_write_bit(wb, 0);
-  }
-}
-
-static void encode_quantization(const AV1_COMMON *const cm,
-                                struct aom_write_bit_buffer *wb) {
-  const int num_planes = av1_num_planes(cm);
-
-  aom_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
-  write_delta_q(wb, cm->y_dc_delta_q);
-  if (num_planes > 1) {
-    int diff_uv_delta = (cm->u_dc_delta_q != cm->v_dc_delta_q) ||
-                        (cm->u_ac_delta_q != cm->v_ac_delta_q);
-    if (cm->seq_params.separate_uv_delta_q) aom_wb_write_bit(wb, diff_uv_delta);
-    write_delta_q(wb, cm->u_dc_delta_q);
-    write_delta_q(wb, cm->u_ac_delta_q);
-    if (diff_uv_delta) {
-      write_delta_q(wb, cm->v_dc_delta_q);
-      write_delta_q(wb, cm->v_ac_delta_q);
-    }
-  }
-  aom_wb_write_bit(wb, cm->using_qmatrix);
-  if (cm->using_qmatrix) {
-    aom_wb_write_literal(wb, cm->qm_y, QM_LEVEL_BITS);
-    aom_wb_write_literal(wb, cm->qm_u, QM_LEVEL_BITS);
-    if (!cm->seq_params.separate_uv_delta_q)
-      assert(cm->qm_u == cm->qm_v);
-    else
-      aom_wb_write_literal(wb, cm->qm_v, QM_LEVEL_BITS);
-  }
-}
-
-static void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd,
-                                struct aom_write_bit_buffer *wb) {
-  int i, j;
-  struct segmentation *seg = &cm->seg;
-
-  aom_wb_write_bit(wb, seg->enabled);
-  if (!seg->enabled) return;
-
-  // Write update flags
-  if (cm->primary_ref_frame == PRIMARY_REF_NONE) {
-    assert(seg->update_map == 1);
-    seg->temporal_update = 0;
-    assert(seg->update_data == 1);
-  } else {
-    aom_wb_write_bit(wb, seg->update_map);
-    if (seg->update_map) {
-      // Select the coding strategy (temporal or spatial)
-      av1_choose_segmap_coding_method(cm, xd);
-      aom_wb_write_bit(wb, seg->temporal_update);
-    }
-    aom_wb_write_bit(wb, seg->update_data);
-  }
-
-  // Segmentation data
-  if (seg->update_data) {
-    for (i = 0; i < MAX_SEGMENTS; i++) {
-      for (j = 0; j < SEG_LVL_MAX; j++) {
-        const int active = segfeature_active(seg, i, j);
-        aom_wb_write_bit(wb, active);
-        if (active) {
-          const int data_max = av1_seg_feature_data_max(j);
-          const int data_min = -data_max;
-          const int ubits = get_unsigned_bits(data_max);
-          const int data = clamp(get_segdata(seg, i, j), data_min, data_max);
-
-          if (av1_is_segfeature_signed(j)) {
-            aom_wb_write_inv_signed_literal(wb, data, ubits);
-          } else {
-            aom_wb_write_literal(wb, data, ubits);
-          }
-        }
-      }
-    }
-  }
-}
-
-static void write_tx_mode(AV1_COMMON *cm, TX_MODE *mode,
-                          struct aom_write_bit_buffer *wb) {
-  if (cm->coded_lossless) {
-    *mode = ONLY_4X4;
-    return;
-  }
-  aom_wb_write_bit(wb, *mode == TX_MODE_SELECT);
-}
-
-static void write_frame_interp_filter(InterpFilter filter,
-                                      struct aom_write_bit_buffer *wb) {
-  aom_wb_write_bit(wb, filter == SWITCHABLE);
-  if (filter != SWITCHABLE)
-    aom_wb_write_literal(wb, filter, LOG_SWITCHABLE_FILTERS);
-}
-
-static void fix_interp_filter(AV1_COMMON *cm, FRAME_COUNTS *counts) {
-  if (cm->interp_filter == SWITCHABLE) {
-    // Check to see if only one of the filters is actually used
-    int count[SWITCHABLE_FILTERS];
-    int i, j, c = 0;
-    for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
-      count[i] = 0;
-      for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
-        count[i] += counts->switchable_interp[j][i];
-      c += (count[i] > 0);
-    }
-    if (c == 1) {
-      // Only one filter is used. So set the filter at frame level
-      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
-        if (count[i]) {
-          if (i == EIGHTTAP_REGULAR) cm->interp_filter = i;
-          break;
-        }
-      }
-    }
-  }
-}
-
-// Same function as write_uniform but writing to uncompresses header wb
-static void wb_write_uniform(struct aom_write_bit_buffer *wb, int n, int v) {
-  const int l = get_unsigned_bits(n);
-  const int m = (1 << l) - n;
-  if (l == 0) return;
-  if (v < m) {
-    aom_wb_write_literal(wb, v, l - 1);
-  } else {
-    aom_wb_write_literal(wb, m + ((v - m) >> 1), l - 1);
-    aom_wb_write_literal(wb, (v - m) & 1, 1);
-  }
-}
-
-static void write_tile_info_max_tile(const AV1_COMMON *const cm,
-                                     struct aom_write_bit_buffer *wb) {
-  int width_mi = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
-  int height_mi = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
-  int width_sb = width_mi >> cm->seq_params.mib_size_log2;
-  int height_sb = height_mi >> cm->seq_params.mib_size_log2;
-  int size_sb, i;
-
-  aom_wb_write_bit(wb, cm->uniform_tile_spacing_flag);
-
-  if (cm->uniform_tile_spacing_flag) {
-    // Uniform spaced tiles with power-of-two number of rows and columns
-    // tile columns
-    int ones = cm->log2_tile_cols - cm->min_log2_tile_cols;
-    while (ones--) {
-      aom_wb_write_bit(wb, 1);
-    }
-    if (cm->log2_tile_cols < cm->max_log2_tile_cols) {
-      aom_wb_write_bit(wb, 0);
-    }
-
-    // rows
-    ones = cm->log2_tile_rows - cm->min_log2_tile_rows;
-    while (ones--) {
-      aom_wb_write_bit(wb, 1);
-    }
-    if (cm->log2_tile_rows < cm->max_log2_tile_rows) {
-      aom_wb_write_bit(wb, 0);
-    }
-  } else {
-    // Explicit tiles with configurable tile widths and heights
-    // columns
-    for (i = 0; i < cm->tile_cols; i++) {
-      size_sb = cm->tile_col_start_sb[i + 1] - cm->tile_col_start_sb[i];
-      wb_write_uniform(wb, AOMMIN(width_sb, cm->max_tile_width_sb),
-                       size_sb - 1);
-      width_sb -= size_sb;
-    }
-    assert(width_sb == 0);
-
-    // rows
-    for (i = 0; i < cm->tile_rows; i++) {
-      size_sb = cm->tile_row_start_sb[i + 1] - cm->tile_row_start_sb[i];
-      wb_write_uniform(wb, AOMMIN(height_sb, cm->max_tile_height_sb),
-                       size_sb - 1);
-      height_sb -= size_sb;
-    }
-    assert(height_sb == 0);
-  }
-}
-
-static void write_tile_info(const AV1_COMMON *const cm,
-                            struct aom_write_bit_buffer *saved_wb,
-                            struct aom_write_bit_buffer *wb) {
-  write_tile_info_max_tile(cm, wb);
-
-  *saved_wb = *wb;
-  if (cm->tile_rows * cm->tile_cols > 1) {
-    // tile id used for cdf update
-    aom_wb_write_literal(wb, 0, cm->log2_tile_cols + cm->log2_tile_rows);
-    // Number of bytes in tile size - 1
-    aom_wb_write_literal(wb, 3, 2);
-  }
-}
-
-static void write_ext_tile_info(const AV1_COMMON *const cm,
-                                struct aom_write_bit_buffer *saved_wb,
-                                struct aom_write_bit_buffer *wb) {
-  // This information is stored as a separate byte.
-  int mod = wb->bit_offset % CHAR_BIT;
-  if (mod > 0) aom_wb_write_literal(wb, 0, CHAR_BIT - mod);
-  assert(aom_wb_is_byte_aligned(wb));
-
-  *saved_wb = *wb;
-  if (cm->tile_rows * cm->tile_cols > 1) {
-    // Note that the last item in the uncompressed header is the data
-    // describing tile configuration.
-    // Number of bytes in tile column size - 1
-    aom_wb_write_literal(wb, 0, 2);
-    // Number of bytes in tile size - 1
-    aom_wb_write_literal(wb, 0, 2);
-  }
-}
-
-static int get_refresh_mask(AV1_COMP *cpi) {
-  if ((cpi->common.frame_type == KEY_FRAME && cpi->common.show_frame) ||
-      frame_is_sframe(&cpi->common))
-    return 0xFF;
-
-  int refresh_mask = 0;
-
-  // NOTE(zoeliu): When LAST_FRAME is to get refreshed, the decoder will be
-  // notified to get LAST3_FRAME refreshed and then the virtual indexes for all
-  // the 3 LAST reference frames will be updated accordingly, i.e.:
-  // (1) The original virtual index for LAST3_FRAME will become the new virtual
-  //     index for LAST_FRAME; and
-  // (2) The original virtual indexes for LAST_FRAME and LAST2_FRAME will be
-  //     shifted and become the new virtual indexes for LAST2_FRAME and
-  //     LAST3_FRAME.
-  refresh_mask |=
-      (cpi->refresh_last_frame << cpi->ref_fb_idx[LAST_REF_FRAMES - 1]);
-#if USE_SYMM_MULTI_LAYER
-  refresh_mask |=
-      (cpi->new_bwdref_update_rule == 1)
-          ? (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[EXTREF_FRAME - 1])
-          : (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[BWDREF_FRAME - 1]);
-#else
-  refresh_mask |=
-      (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[BWDREF_FRAME - 1]);
-#endif
-  refresh_mask |=
-      (cpi->refresh_alt2_ref_frame << cpi->ref_fb_idx[ALTREF2_FRAME - 1]);
-
-  if (av1_preserve_existing_gf(cpi)) {
-    // We have decided to preserve the previously existing golden frame as our
-    // new ARF frame. However, in the short term we leave it in the GF slot and,
-    // if we're updating the GF with the current decoded frame, we save it
-    // instead to the ARF slot.
-    // Later, in the function av1_encoder.c:av1_update_reference_frames() we
-    // will swap gld_fb_idx and alt_fb_idx to achieve our objective. We do it
-    // there so that it can be done outside of the recode loop.
-    // Note: This is highly specific to the use of ARF as a forward reference,
-    // and this needs to be generalized as other uses are implemented
-    // (like RTC/temporal scalability).
-
-    if (cpi->preserve_arf_as_gld) {
-      return refresh_mask;
-    } else {
-      return refresh_mask |
-             (cpi->refresh_golden_frame << cpi->ref_fb_idx[ALTREF_FRAME - 1]);
-    }
-  } else {
-    const int arf_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
-    return refresh_mask |
-           (cpi->refresh_golden_frame << cpi->ref_fb_idx[GOLDEN_FRAME - 1]) |
-           (cpi->refresh_alt_ref_frame << arf_idx);
-  }
-}
-
-static INLINE int find_identical_tile(
-    const int tile_row, const int tile_col,
-    TileBufferEnc (*const tile_buffers)[MAX_TILE_COLS]) {
-  const MV32 candidate_offset[1] = { { 1, 0 } };
-  const uint8_t *const cur_tile_data =
-      tile_buffers[tile_row][tile_col].data + 4;
-  const size_t cur_tile_size = tile_buffers[tile_row][tile_col].size;
-
-  int i;
-
-  if (tile_row == 0) return 0;
-
-  // (TODO: yunqingwang) For now, only above tile is checked and used.
-  // More candidates such as left tile can be added later.
-  for (i = 0; i < 1; i++) {
-    int row_offset = candidate_offset[0].row;
-    int col_offset = candidate_offset[0].col;
-    int row = tile_row - row_offset;
-    int col = tile_col - col_offset;
-    uint8_t tile_hdr;
-    const uint8_t *tile_data;
-    TileBufferEnc *candidate;
-
-    if (row < 0 || col < 0) continue;
-
-    tile_hdr = *(tile_buffers[row][col].data);
-
-    // Read out tcm bit
-    if ((tile_hdr >> 7) == 1) {
-      // The candidate is a copy tile itself
-      row_offset += tile_hdr & 0x7f;
-      row = tile_row - row_offset;
-    }
-
-    candidate = &tile_buffers[row][col];
-
-    if (row_offset >= 128 || candidate->size != cur_tile_size) continue;
-
-    tile_data = candidate->data + 4;
-
-    if (memcmp(tile_data, cur_tile_data, cur_tile_size) != 0) continue;
-
-    // Identical tile found
-    assert(row_offset > 0);
-    return row_offset;
-  }
-
-  // No identical tile found
-  return 0;
-}
-
-static void write_render_size(const AV1_COMMON *cm,
-                              struct aom_write_bit_buffer *wb) {
-  const int scaling_active = av1_resize_scaled(cm);
-  aom_wb_write_bit(wb, scaling_active);
-  if (scaling_active) {
-    aom_wb_write_literal(wb, cm->render_width - 1, 16);
-    aom_wb_write_literal(wb, cm->render_height - 1, 16);
-  }
-}
-
-static void write_superres_scale(const AV1_COMMON *const cm,
-                                 struct aom_write_bit_buffer *wb) {
-  const SequenceHeader *const seq_params = &cm->seq_params;
-  if (!seq_params->enable_superres) {
-    assert(cm->superres_scale_denominator == SCALE_NUMERATOR);
-    return;
-  }
-
-  // First bit is whether to to scale or not
-  if (cm->superres_scale_denominator == SCALE_NUMERATOR) {
-    aom_wb_write_bit(wb, 0);  // no scaling
-  } else {
-    aom_wb_write_bit(wb, 1);  // scaling, write scale factor
-    assert(cm->superres_scale_denominator >= SUPERRES_SCALE_DENOMINATOR_MIN);
-    assert(cm->superres_scale_denominator <
-           SUPERRES_SCALE_DENOMINATOR_MIN + (1 << SUPERRES_SCALE_BITS));
-    aom_wb_write_literal(
-        wb, cm->superres_scale_denominator - SUPERRES_SCALE_DENOMINATOR_MIN,
-        SUPERRES_SCALE_BITS);
-  }
-}
-
-static void write_frame_size(const AV1_COMMON *cm, int frame_size_override,
-                             struct aom_write_bit_buffer *wb) {
-  const int coded_width = cm->superres_upscaled_width - 1;
-  const int coded_height = cm->superres_upscaled_height - 1;
-
-  if (frame_size_override) {
-    const SequenceHeader *seq_params = &cm->seq_params;
-    int num_bits_width = seq_params->num_bits_width;
-    int num_bits_height = seq_params->num_bits_height;
-    aom_wb_write_literal(wb, coded_width, num_bits_width);
-    aom_wb_write_literal(wb, coded_height, num_bits_height);
-  }
-
-  write_superres_scale(cm, wb);
-  write_render_size(cm, wb);
-}
-
-static void write_frame_size_with_refs(AV1_COMP *cpi,
-                                       struct aom_write_bit_buffer *wb) {
-  AV1_COMMON *const cm = &cpi->common;
-  int found = 0;
-
-  MV_REFERENCE_FRAME ref_frame;
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame);
-
-    if (cfg != NULL) {
-      found = cm->superres_upscaled_width == cfg->y_crop_width &&
-              cm->superres_upscaled_height == cfg->y_crop_height;
-      found &= cm->render_width == cfg->render_width &&
-               cm->render_height == cfg->render_height;
-    }
-    aom_wb_write_bit(wb, found);
-    if (found) {
-      write_superres_scale(cm, wb);
-      break;
-    }
-  }
-
-  if (!found) {
-    int frame_size_override = 1;  // Always equal to 1 in this function
-    write_frame_size(cm, frame_size_override, wb);
-  }
-}
-
-static void write_profile(BITSTREAM_PROFILE profile,
-                          struct aom_write_bit_buffer *wb) {
-  assert(profile >= PROFILE_0 && profile < MAX_PROFILES);
-  aom_wb_write_literal(wb, profile, PROFILE_BITS);
-}
-
-static void write_bitdepth(const SequenceHeader *const seq_params,
-                           struct aom_write_bit_buffer *wb) {
-  // Profile 0/1: [0] for 8 bit, [1]  10-bit
-  // Profile   2: [0] for 8 bit, [10] 10-bit, [11] - 12-bit
-  aom_wb_write_bit(wb, seq_params->bit_depth == AOM_BITS_8 ? 0 : 1);
-  if (seq_params->profile == PROFILE_2 && seq_params->bit_depth != AOM_BITS_8) {
-    aom_wb_write_bit(wb, seq_params->bit_depth == AOM_BITS_10 ? 0 : 1);
-  }
-}
-
-static void write_color_config(const SequenceHeader *const seq_params,
-                               struct aom_write_bit_buffer *wb) {
-  write_bitdepth(seq_params, wb);
-  const int is_monochrome = seq_params->monochrome;
-  // monochrome bit
-  if (seq_params->profile != PROFILE_1)
-    aom_wb_write_bit(wb, is_monochrome);
-  else
-    assert(!is_monochrome);
-  if (seq_params->color_primaries == AOM_CICP_CP_UNSPECIFIED &&
-      seq_params->transfer_characteristics == AOM_CICP_TC_UNSPECIFIED &&
-      seq_params->matrix_coefficients == AOM_CICP_MC_UNSPECIFIED) {
-    aom_wb_write_bit(wb, 0);  // No color description present
-  } else {
-    aom_wb_write_bit(wb, 1);  // Color description present
-    aom_wb_write_literal(wb, seq_params->color_primaries, 8);
-    aom_wb_write_literal(wb, seq_params->transfer_characteristics, 8);
-    aom_wb_write_literal(wb, seq_params->matrix_coefficients, 8);
-  }
-  if (is_monochrome) {
-    // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
-    aom_wb_write_bit(wb, seq_params->color_range);
-    return;
-  }
-  if (seq_params->color_primaries == AOM_CICP_CP_BT_709 &&
-      seq_params->transfer_characteristics == AOM_CICP_TC_SRGB &&
-      seq_params->matrix_coefficients ==
-          AOM_CICP_MC_IDENTITY) {  // it would be better to remove this
-                                   // dependency too
-    assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0);
-    assert(seq_params->profile == PROFILE_1 ||
-           (seq_params->profile == PROFILE_2 &&
-            seq_params->bit_depth == AOM_BITS_12));
-  } else {
-    // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
-    aom_wb_write_bit(wb, seq_params->color_range);
-    if (seq_params->profile == PROFILE_0) {
-      // 420 only
-      assert(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1);
-    } else if (seq_params->profile == PROFILE_1) {
-      // 444 only
-      assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0);
-    } else if (seq_params->profile == PROFILE_2) {
-      if (seq_params->bit_depth == AOM_BITS_12) {
-        // 420, 444 or 422
-        aom_wb_write_bit(wb, seq_params->subsampling_x);
-        if (seq_params->subsampling_x == 0) {
-          assert(seq_params->subsampling_y == 0 &&
-                 "4:4:0 subsampling not allowed in AV1");
-        } else {
-          aom_wb_write_bit(wb, seq_params->subsampling_y);
-        }
-      } else {
-        // 422 only
-        assert(seq_params->subsampling_x == 1 &&
-               seq_params->subsampling_y == 0);
-      }
-    }
-    if (seq_params->matrix_coefficients == AOM_CICP_MC_IDENTITY) {
-      assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0);
-    }
-    if (seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1) {
-      aom_wb_write_literal(wb, seq_params->chroma_sample_position, 2);
-    }
-  }
-  aom_wb_write_bit(wb, seq_params->separate_uv_delta_q);
-}
-
-static void write_timing_info_header(AV1_COMMON *const cm,
-                                     struct aom_write_bit_buffer *wb) {
-  aom_wb_write_unsigned_literal(wb, cm->timing_info.num_units_in_display_tick,
-                                32);  // Number of units in tick
-  aom_wb_write_unsigned_literal(wb, cm->timing_info.time_scale,
-                                32);  // Time scale
-  aom_wb_write_bit(
-      wb,
-      cm->timing_info.equal_picture_interval);  // Equal picture interval bit
-  if (cm->timing_info.equal_picture_interval) {
-    aom_wb_write_uvlc(
-        wb,
-        cm->timing_info.num_ticks_per_picture - 1);  // ticks per picture
-  }
-}
-
-static void write_decoder_model_info(AV1_COMMON *const cm,
-                                     struct aom_write_bit_buffer *wb) {
-  aom_wb_write_literal(
-      wb, cm->buffer_model.encoder_decoder_buffer_delay_length - 1, 5);
-  aom_wb_write_unsigned_literal(wb, cm->buffer_model.num_units_in_decoding_tick,
-                                32);  // Number of units in decoding tick
-  aom_wb_write_literal(wb, cm->buffer_model.buffer_removal_time_length - 1, 5);
-  aom_wb_write_literal(wb, cm->buffer_model.frame_presentation_time_length - 1,
-                       5);
-}
-
-static void write_dec_model_op_parameters(AV1_COMMON *const cm,
-                                          struct aom_write_bit_buffer *wb,
-                                          int op_num) {
-  if (op_num > MAX_NUM_OPERATING_POINTS)
-    aom_internal_error(
-        &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
-        "Encoder does not support %d decoder model operating points", op_num);
-
-  //  aom_wb_write_bit(wb, cm->op_params[op_num].has_parameters);
-  //  if (!cm->op_params[op_num].has_parameters) return;
-
-  aom_wb_write_unsigned_literal(
-      wb, cm->op_params[op_num].decoder_buffer_delay,
-      cm->buffer_model.encoder_decoder_buffer_delay_length);
-
-  aom_wb_write_unsigned_literal(
-      wb, cm->op_params[op_num].encoder_buffer_delay,
-      cm->buffer_model.encoder_decoder_buffer_delay_length);
-
-  aom_wb_write_bit(wb, cm->op_params[op_num].low_delay_mode_flag);
-
-  cm->op_frame_timing[op_num].buffer_removal_time =
-      0;  // reset the decoded frame counter
-}
-
-static void write_tu_pts_info(AV1_COMMON *const cm,
-                              struct aom_write_bit_buffer *wb) {
-  aom_wb_write_unsigned_literal(
-      wb, cm->frame_presentation_time,
-      cm->buffer_model.frame_presentation_time_length);
-}
-
-static void write_film_grain_params(AV1_COMP *cpi,
-                                    struct aom_write_bit_buffer *wb) {
-  AV1_COMMON *const cm = &cpi->common;
-  aom_film_grain_t *pars = &cm->film_grain_params;
-
-  cm->cur_frame->film_grain_params = *pars;
-
-  aom_wb_write_bit(wb, pars->apply_grain);
-  if (!pars->apply_grain) return;
-
-  aom_wb_write_literal(wb, pars->random_seed, 16);
-
-  pars->random_seed += 3381;  // Changing random seed for film grain
-  if (!pars->random_seed)     // Random seed should not be zero
-    pars->random_seed += 7391;
-  if (cm->frame_type == INTER_FRAME)
-    aom_wb_write_bit(wb, pars->update_parameters);
-  else
-    pars->update_parameters = 1;
-  if (!pars->update_parameters) {
-    RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-    int ref_frame, ref_idx, buf_idx;
-    for (ref_frame = LAST_FRAME; ref_frame < REF_FRAMES; ref_frame++) {
-      ref_idx = get_ref_frame_map_idx(cpi, ref_frame);
-      assert(ref_idx != INVALID_IDX);
-      buf_idx = cm->ref_frame_map[ref_idx];
-      if (frame_bufs[buf_idx].film_grain_params_present &&
-          memcmp(pars, &frame_bufs[buf_idx].film_grain_params, sizeof(*pars))) {
-        break;
-      }
-    }
-    assert(ref_frame < REF_FRAMES);
-    aom_wb_write_literal(wb, ref_idx, 3);
-    return;
-  }
-
-  // Scaling functions parameters
-  aom_wb_write_literal(wb, pars->num_y_points, 4);  // max 14
-  for (int i = 0; i < pars->num_y_points; i++) {
-    aom_wb_write_literal(wb, pars->scaling_points_y[i][0], 8);
-    aom_wb_write_literal(wb, pars->scaling_points_y[i][1], 8);
-  }
-
-  if (!cm->seq_params.monochrome)
-    aom_wb_write_bit(wb, pars->chroma_scaling_from_luma);
-  else
-    pars->chroma_scaling_from_luma = 0;  // for monochrome override to 0
-
-  if (cm->seq_params.monochrome || pars->chroma_scaling_from_luma ||
-      ((cm->seq_params.subsampling_x == 1) &&
-       (cm->seq_params.subsampling_y == 1) && (pars->num_y_points == 0))) {
-    pars->num_cb_points = 0;
-    pars->num_cr_points = 0;
-  } else {
-    aom_wb_write_literal(wb, pars->num_cb_points, 4);  // max 10
-    for (int i = 0; i < pars->num_cb_points; i++) {
-      aom_wb_write_literal(wb, pars->scaling_points_cb[i][0], 8);
-      aom_wb_write_literal(wb, pars->scaling_points_cb[i][1], 8);
-    }
-
-    aom_wb_write_literal(wb, pars->num_cr_points, 4);  // max 10
-    for (int i = 0; i < pars->num_cr_points; i++) {
-      aom_wb_write_literal(wb, pars->scaling_points_cr[i][0], 8);
-      aom_wb_write_literal(wb, pars->scaling_points_cr[i][1], 8);
-    }
-  }
-
-  aom_wb_write_literal(wb, pars->scaling_shift - 8, 2);  // 8 + value
-
-  // AR coefficients
-  // Only sent if the corresponsing scaling function has
-  // more than 0 points
-
-  aom_wb_write_literal(wb, pars->ar_coeff_lag, 2);
-
-  int num_pos_luma = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
-  int num_pos_chroma = num_pos_luma;
-  if (pars->num_y_points > 0) ++num_pos_chroma;
-
-  if (pars->num_y_points)
-    for (int i = 0; i < num_pos_luma; i++)
-      aom_wb_write_literal(wb, pars->ar_coeffs_y[i] + 128, 8);
-
-  if (pars->num_cb_points || pars->chroma_scaling_from_luma)
-    for (int i = 0; i < num_pos_chroma; i++)
-      aom_wb_write_literal(wb, pars->ar_coeffs_cb[i] + 128, 8);
-
-  if (pars->num_cr_points || pars->chroma_scaling_from_luma)
-    for (int i = 0; i < num_pos_chroma; i++)
-      aom_wb_write_literal(wb, pars->ar_coeffs_cr[i] + 128, 8);
-
-  aom_wb_write_literal(wb, pars->ar_coeff_shift - 6, 2);  // 8 + value
-
-  aom_wb_write_literal(wb, pars->grain_scale_shift, 2);
-
-  if (pars->num_cb_points) {
-    aom_wb_write_literal(wb, pars->cb_mult, 8);
-    aom_wb_write_literal(wb, pars->cb_luma_mult, 8);
-    aom_wb_write_literal(wb, pars->cb_offset, 9);
-  }
-
-  if (pars->num_cr_points) {
-    aom_wb_write_literal(wb, pars->cr_mult, 8);
-    aom_wb_write_literal(wb, pars->cr_luma_mult, 8);
-    aom_wb_write_literal(wb, pars->cr_offset, 9);
-  }
-
-  aom_wb_write_bit(wb, pars->overlap_flag);
-
-  aom_wb_write_bit(wb, pars->clip_to_restricted_range);
-}
-
-static void write_sb_size(SequenceHeader *seq_params,
-                          struct aom_write_bit_buffer *wb) {
-  (void)seq_params;
-  (void)wb;
-  assert(seq_params->mib_size == mi_size_wide[seq_params->sb_size]);
-  assert(seq_params->mib_size == 1 << seq_params->mib_size_log2);
-  assert(seq_params->sb_size == BLOCK_128X128 ||
-         seq_params->sb_size == BLOCK_64X64);
-  aom_wb_write_bit(wb, seq_params->sb_size == BLOCK_128X128 ? 1 : 0);
-}
-
-static void write_sequence_header(AV1_COMP *cpi,
-                                  struct aom_write_bit_buffer *wb) {
-  AV1_COMMON *const cm = &cpi->common;
-  SequenceHeader *seq_params = &cm->seq_params;
-
-  int max_frame_width = cpi->oxcf.forced_max_frame_width
-                            ? cpi->oxcf.forced_max_frame_width
-                            : cpi->oxcf.width;
-  int max_frame_height = cpi->oxcf.forced_max_frame_height
-                             ? cpi->oxcf.forced_max_frame_height
-                             : cpi->oxcf.height;
-  // max((int)ceil(log2(max_frame_width)), 1)
-  const int num_bits_width =
-      (max_frame_width > 1) ? get_msb(max_frame_width - 1) + 1 : 1;
-  // max((int)ceil(log2(max_frame_height)), 1)
-  const int num_bits_height =
-      (max_frame_height > 1) ? get_msb(max_frame_height - 1) + 1 : 1;
-  assert(num_bits_width <= 16);
-  assert(num_bits_height <= 16);
-
-  seq_params->num_bits_width = num_bits_width;
-  seq_params->num_bits_height = num_bits_height;
-  seq_params->max_frame_width = max_frame_width;
-  seq_params->max_frame_height = max_frame_height;
-
-  aom_wb_write_literal(wb, num_bits_width - 1, 4);
-  aom_wb_write_literal(wb, num_bits_height - 1, 4);
-  aom_wb_write_literal(wb, max_frame_width - 1, num_bits_width);
-  aom_wb_write_literal(wb, max_frame_height - 1, num_bits_height);
-
-  /* Placeholder for actually writing to the bitstream */
-  if (!seq_params->reduced_still_picture_hdr) {
-    seq_params->frame_id_numbers_present_flag =
-        cm->large_scale_tile ? 0 : cm->error_resilient_mode;
-    seq_params->frame_id_length = FRAME_ID_LENGTH;
-    seq_params->delta_frame_id_length = DELTA_FRAME_ID_LENGTH;
-
-    aom_wb_write_bit(wb, seq_params->frame_id_numbers_present_flag);
-    if (seq_params->frame_id_numbers_present_flag) {
-      // We must always have delta_frame_id_length < frame_id_length,
-      // in order for a frame to be referenced with a unique delta.
-      // Avoid wasting bits by using a coding that enforces this restriction.
-      aom_wb_write_literal(wb, seq_params->delta_frame_id_length - 2, 4);
-      aom_wb_write_literal(
-          wb,
-          seq_params->frame_id_length - seq_params->delta_frame_id_length - 1,
-          3);
-    }
-  }
-
-  write_sb_size(seq_params, wb);
-
-  aom_wb_write_bit(wb, seq_params->enable_filter_intra);
-  aom_wb_write_bit(wb, seq_params->enable_intra_edge_filter);
-
-  if (!seq_params->reduced_still_picture_hdr) {
-    aom_wb_write_bit(wb, seq_params->enable_interintra_compound);
-    aom_wb_write_bit(wb, seq_params->enable_masked_compound);
-    aom_wb_write_bit(wb, seq_params->enable_warped_motion);
-    aom_wb_write_bit(wb, seq_params->enable_dual_filter);
-
-    aom_wb_write_bit(wb, seq_params->enable_order_hint);
-
-    if (seq_params->enable_order_hint) {
-      aom_wb_write_bit(wb, seq_params->enable_jnt_comp);
-      aom_wb_write_bit(wb, seq_params->enable_ref_frame_mvs);
-    }
-    if (seq_params->force_screen_content_tools == 2) {
-      aom_wb_write_bit(wb, 1);
-    } else {
-      aom_wb_write_bit(wb, 0);
-      aom_wb_write_bit(wb, seq_params->force_screen_content_tools);
-    }
-    if (seq_params->force_screen_content_tools > 0) {
-      if (seq_params->force_integer_mv == 2) {
-        aom_wb_write_bit(wb, 1);
-      } else {
-        aom_wb_write_bit(wb, 0);
-        aom_wb_write_bit(wb, seq_params->force_integer_mv);
-      }
-    } else {
-      assert(seq_params->force_integer_mv == 2);
-    }
-    if (seq_params->enable_order_hint)
-      aom_wb_write_literal(wb, seq_params->order_hint_bits_minus_1, 3);
-  }
-
-  aom_wb_write_bit(wb, seq_params->enable_superres);
-  aom_wb_write_bit(wb, seq_params->enable_cdef);
-  aom_wb_write_bit(wb, seq_params->enable_restoration);
-}
-
-static void write_global_motion_params(const WarpedMotionParams *params,
-                                       const WarpedMotionParams *ref_params,
-                                       struct aom_write_bit_buffer *wb,
-                                       int allow_hp) {
-  const TransformationType type = params->wmtype;
-
-  aom_wb_write_bit(wb, type != IDENTITY);
-  if (type != IDENTITY) {
-    aom_wb_write_bit(wb, type == ROTZOOM);
-    if (type != ROTZOOM) aom_wb_write_bit(wb, type == TRANSLATION);
-  }
-
-  if (type >= ROTZOOM) {
-    aom_wb_write_signed_primitive_refsubexpfin(
-        wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-        (ref_params->wmmat[2] >> GM_ALPHA_PREC_DIFF) -
-            (1 << GM_ALPHA_PREC_BITS),
-        (params->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
-    aom_wb_write_signed_primitive_refsubexpfin(
-        wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-        (ref_params->wmmat[3] >> GM_ALPHA_PREC_DIFF),
-        (params->wmmat[3] >> GM_ALPHA_PREC_DIFF));
-  }
-
-  if (type >= AFFINE) {
-    aom_wb_write_signed_primitive_refsubexpfin(
-        wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-        (ref_params->wmmat[4] >> GM_ALPHA_PREC_DIFF),
-        (params->wmmat[4] >> GM_ALPHA_PREC_DIFF));
-    aom_wb_write_signed_primitive_refsubexpfin(
-        wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-        (ref_params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
-            (1 << GM_ALPHA_PREC_BITS),
-        (params->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
-  }
-
-  if (type >= TRANSLATION) {
-    const int trans_bits = (type == TRANSLATION)
-                               ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
-                               : GM_ABS_TRANS_BITS;
-    const int trans_prec_diff = (type == TRANSLATION)
-                                    ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
-                                    : GM_TRANS_PREC_DIFF;
-    aom_wb_write_signed_primitive_refsubexpfin(
-        wb, (1 << trans_bits) + 1, SUBEXPFIN_K,
-        (ref_params->wmmat[0] >> trans_prec_diff),
-        (params->wmmat[0] >> trans_prec_diff));
-    aom_wb_write_signed_primitive_refsubexpfin(
-        wb, (1 << trans_bits) + 1, SUBEXPFIN_K,
-        (ref_params->wmmat[1] >> trans_prec_diff),
-        (params->wmmat[1] >> trans_prec_diff));
-  }
-}
-
-static void write_global_motion(AV1_COMP *cpi,
-                                struct aom_write_bit_buffer *wb) {
-  AV1_COMMON *const cm = &cpi->common;
-  int frame;
-  for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
-    const WarpedMotionParams *ref_params =
-        cm->prev_frame ? &cm->prev_frame->global_motion[frame]
-                       : &default_warp_params;
-    write_global_motion_params(&cm->global_motion[frame], ref_params, wb,
-                               cm->allow_high_precision_mv);
-    // TODO(sarahparker, debargha): The logic in the commented out code below
-    // does not work currently and causes mismatches when resize is on.
-    // Fix it before turning the optimization back on.
-    /*
-    YV12_BUFFER_CONFIG *ref_buf = get_ref_frame_buffer(cpi, frame);
-    if (cpi->source->y_crop_width == ref_buf->y_crop_width &&
-        cpi->source->y_crop_height == ref_buf->y_crop_height) {
-      write_global_motion_params(&cm->global_motion[frame],
-                                 &cm->prev_frame->global_motion[frame], wb,
-                                 cm->allow_high_precision_mv);
-    } else {
-      assert(cm->global_motion[frame].wmtype == IDENTITY &&
-             "Invalid warp type for frames of different resolutions");
-    }
-    */
-    /*
-    printf("Frame %d/%d: Enc Ref %d: %d %d %d %d\n",
-           cm->current_video_frame, cm->show_frame, frame,
-           cm->global_motion[frame].wmmat[0],
-           cm->global_motion[frame].wmmat[1], cm->global_motion[frame].wmmat[2],
-           cm->global_motion[frame].wmmat[3]);
-           */
-  }
-}
-
-static void check_frame_refs_short_signaling(AV1_COMP *const cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  if (!cm->frame_refs_short_signaling) return;
-
-  // Check whether all references are distinct frames.
-  int buf_markers[FRAME_BUFFERS] = { 0 };
-  for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
-    if (buf_idx != INVALID_IDX) {
-      assert(buf_idx >= 0 && buf_idx < FRAME_BUFFERS);
-      buf_markers[buf_idx] = 1;
-    }
-  }
-
-  int num_refs = 0;
-  for (int buf_idx = 0; buf_idx < FRAME_BUFFERS; ++buf_idx) {
-    num_refs += buf_markers[buf_idx];
-  }
-
-  // We only turn on frame_refs_short_signaling when all references are
-  // distinct.
-  if (num_refs < INTER_REFS_PER_FRAME) {
-    // It indicates that there exist more than one reference frame pointing to
-    // the same reference buffer, i.e. two or more references are duplicate.
-    cm->frame_refs_short_signaling = 0;
-    return;
-  }
-
-  // Check whether the encoder side ref frame choices are aligned with that to
-  // be derived at the decoder side.
-  RefBuffer frame_refs_copy[INTER_REFS_PER_FRAME];
-
-  // Backup the frame refs info
-  memcpy(frame_refs_copy, cm->frame_refs,
-         INTER_REFS_PER_FRAME * sizeof(RefBuffer));
-
-  const int lst_map_idx = get_ref_frame_map_idx(cpi, LAST_FRAME);
-  const int gld_map_idx = get_ref_frame_map_idx(cpi, GOLDEN_FRAME);
-
-  // Set up the frame refs mapping indexes according to the
-  // frame_refs_short_signaling policy.
-  av1_set_frame_refs(cm, lst_map_idx, gld_map_idx);
-
-  // We only turn on frame_refs_short_signaling when the encoder side decision
-  // on ref frames is identical to that at the decoder side.
-  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ++ref_idx) {
-    // Compare the buffer index between two reference frames indexed
-    // respectively by the encoder and the decoder side decisions.
-    if (cm->frame_refs[ref_idx].idx != frame_refs_copy[ref_idx].idx) {
-      cm->frame_refs_short_signaling = 0;
-      break;
-    }
-  }
-
-#if 0   // For debug
-  printf("\nFrame=%d: \n", cm->current_video_frame);
-  printf("***frame_refs_short_signaling=%d\n", cm->frame_refs_short_signaling);
-  for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    printf("enc_ref(map_idx=%d, buf_idx=%d)=%d, vs. "
-        "dec_ref(map_idx=%d, buf_idx=%d)=%d\n",
-        get_ref_frame_map_idx(cpi, ref_frame),
-        get_ref_frame_buf_idx(cpi, ref_frame), ref_frame,
-        cm->frame_refs[ref_frame - LAST_FRAME].map_idx,
-        cm->frame_refs[ref_frame - LAST_FRAME].idx, ref_frame);
-  }
-#endif  // 0
-
-  // Restore the frame refs info if frame_refs_short_signaling is off.
-  if (!cm->frame_refs_short_signaling)
-    memcpy(cm->frame_refs, frame_refs_copy,
-           INTER_REFS_PER_FRAME * sizeof(RefBuffer));
-}
-
-// New function based on HLS R18
-static void write_uncompressed_header_obu(AV1_COMP *cpi,
-                                          struct aom_write_bit_buffer *saved_wb,
-                                          struct aom_write_bit_buffer *wb) {
-  AV1_COMMON *const cm = &cpi->common;
-  const SequenceHeader *const seq_params = &cm->seq_params;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-
-  // NOTE: By default all coded frames to be used as a reference
-  cm->is_reference_frame = 1;
-  cm->frame_type = cm->intra_only ? INTRA_ONLY_FRAME : cm->frame_type;
-
-  if (seq_params->still_picture) {
-    assert(cm->show_existing_frame == 0);
-    assert(cm->show_frame == 1);
-    assert(cm->frame_type == KEY_FRAME);
-  }
-  if (!seq_params->reduced_still_picture_hdr) {
-    if (encode_show_existing_frame(cm)) {
-      RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-      const int frame_to_show = cm->ref_frame_map[cpi->existing_fb_idx_to_show];
-
-      if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
-        aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
-                           "Buffer %d does not contain a reconstructed frame",
-                           frame_to_show);
-      }
-      ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
-
-      aom_wb_write_bit(wb, 1);  // show_existing_frame
-      aom_wb_write_literal(wb, cpi->existing_fb_idx_to_show, 3);
-
-      if (seq_params->decoder_model_info_present_flag &&
-          cm->timing_info.equal_picture_interval == 0) {
-        write_tu_pts_info(cm, wb);
-      }
-      if (seq_params->frame_id_numbers_present_flag) {
-        int frame_id_len = seq_params->frame_id_length;
-        int display_frame_id = cm->ref_frame_id[cpi->existing_fb_idx_to_show];
-        aom_wb_write_literal(wb, display_frame_id, frame_id_len);
-      }
-
-      if (cm->reset_decoder_state &&
-          frame_bufs[frame_to_show].frame_type != KEY_FRAME) {
-        aom_internal_error(
-            &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
-            "show_existing_frame to reset state on KEY_FRAME only");
-      }
-
-      return;
-    } else {
-      aom_wb_write_bit(wb, 0);  // show_existing_frame
-    }
-
-    aom_wb_write_literal(wb, cm->frame_type, 2);
-
-    aom_wb_write_bit(wb, cm->show_frame);
-    if (cm->show_frame) {
-      if (seq_params->decoder_model_info_present_flag &&
-          cm->timing_info.equal_picture_interval == 0)
-        write_tu_pts_info(cm, wb);
-    } else {
-      aom_wb_write_bit(wb, cm->showable_frame);
-    }
-    if (frame_is_sframe(cm)) {
-      assert(cm->error_resilient_mode);
-    } else if (!(cm->frame_type == KEY_FRAME && cm->show_frame)) {
-      aom_wb_write_bit(wb, cm->error_resilient_mode);
-    }
-  }
-  aom_wb_write_bit(wb, cm->disable_cdf_update);
-
-  if (seq_params->force_screen_content_tools == 2) {
-    aom_wb_write_bit(wb, cm->allow_screen_content_tools);
-  } else {
-    assert(cm->allow_screen_content_tools ==
-           seq_params->force_screen_content_tools);
-  }
-
-  if (cm->allow_screen_content_tools) {
-    if (seq_params->force_integer_mv == 2) {
-      aom_wb_write_bit(wb, cm->cur_frame_force_integer_mv);
-    } else {
-      assert(cm->cur_frame_force_integer_mv == seq_params->force_integer_mv);
-    }
-  } else {
-    assert(cm->cur_frame_force_integer_mv == 0);
-  }
-
-  cm->invalid_delta_frame_id_minus_1 = 0;
-  int frame_size_override_flag = 0;
-  cm->frame_refs_short_signaling = 0;
-
-  if (seq_params->reduced_still_picture_hdr) {
-    assert(cm->width == seq_params->max_frame_width &&
-           cm->height == seq_params->max_frame_height);
-  } else {
-    if (seq_params->frame_id_numbers_present_flag) {
-      int frame_id_len = seq_params->frame_id_length;
-      aom_wb_write_literal(wb, cm->current_frame_id, frame_id_len);
-    }
-
-    if (cm->width > seq_params->max_frame_width ||
-        cm->height > seq_params->max_frame_height) {
-      aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
-                         "Frame dimensions are larger than the maximum values");
-    }
-
-    frame_size_override_flag =
-        frame_is_sframe(cm) ? 1
-                            : (cm->width != seq_params->max_frame_width ||
-                               cm->height != seq_params->max_frame_height);
-    if (!frame_is_sframe(cm)) aom_wb_write_bit(wb, frame_size_override_flag);
-
-    if (seq_params->enable_order_hint)
-      aom_wb_write_literal(wb, cm->frame_offset,
-                           seq_params->order_hint_bits_minus_1 + 1);
-
-    if (!cm->error_resilient_mode && !frame_is_intra_only(cm)) {
-      aom_wb_write_literal(wb, cm->primary_ref_frame, PRIMARY_REF_BITS);
-    }
-  }
-
-  if (seq_params->decoder_model_info_present_flag) {
-    aom_wb_write_bit(wb, cm->buffer_removal_time_present);
-    if (cm->buffer_removal_time_present) {
-      for (int op_num = 0;
-           op_num < seq_params->operating_points_cnt_minus_1 + 1; op_num++) {
-        if (cm->op_params[op_num].decoder_model_param_present_flag) {
-          if (((seq_params->operating_point_idc[op_num] >>
-                cm->temporal_layer_id) &
-                   0x1 &&
-               (seq_params->operating_point_idc[op_num] >>
-                (cm->spatial_layer_id + 8)) &
-                   0x1) ||
-              seq_params->operating_point_idc[op_num] == 0) {
-            aom_wb_write_unsigned_literal(
-                wb, cm->op_frame_timing[op_num].buffer_removal_time,
-                cm->buffer_model.buffer_removal_time_length);
-            cm->op_frame_timing[op_num].buffer_removal_time++;
-            if (cm->op_frame_timing[op_num].buffer_removal_time == 0) {
-              aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
-                                 "buffer_removal_time overflowed");
-            }
-          }
-        }
-      }
-    }
-  }
-  cpi->refresh_frame_mask = get_refresh_mask(cpi);
-  if (cm->frame_type == KEY_FRAME) {
-    if (!cm->show_frame) {  // unshown keyframe (forward keyframe)
-      aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
-    } else {
-      assert(cpi->refresh_frame_mask == 0xFF);
-    }
-  } else {
-    if (cm->frame_type == INTRA_ONLY_FRAME) {
-      assert(cpi->refresh_frame_mask != 0xFF);
-      int updated_fb = -1;
-      for (int i = 0; i < REF_FRAMES; i++) {
-        // If more than one frame is refreshed, it doesn't matter which one
-        // we pick, so pick the first.
-        if (cpi->refresh_frame_mask & (1 << i)) {
-          updated_fb = i;
-          break;
-        }
-      }
-      assert(updated_fb >= 0);
-      cm->fb_of_context_type[cm->frame_context_idx] = updated_fb;
-      aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
-    } else if (cm->frame_type == INTER_FRAME || frame_is_sframe(cm)) {
-      if (cm->frame_type == INTER_FRAME) {
-        aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
-      } else {
-        assert(frame_is_sframe(cm) && cpi->refresh_frame_mask == 0xFF);
-      }
-      int updated_fb = -1;
-      for (int i = 0; i < REF_FRAMES; i++) {
-        // If more than one frame is refreshed, it doesn't matter which one
-        // we pick, so pick the first.
-        if (cpi->refresh_frame_mask & (1 << i)) {
-          updated_fb = i;
-          break;
-        }
-      }
-      // large scale tile sometimes won't refresh any fbs
-      if (updated_fb >= 0) {
-        cm->fb_of_context_type[cm->frame_context_idx] = updated_fb;
-      }
-
-      if (!cpi->refresh_frame_mask) {
-        // NOTE: "cpi->refresh_frame_mask == 0" indicates that the coded frame
-        //       will not be used as a reference
-        cm->is_reference_frame = 0;
-      }
-    }
-  }
-
-  if (!frame_is_intra_only(cm) || cpi->refresh_frame_mask != 0xFF) {
-    // Write all ref frame order hints if error_resilient_mode == 1
-    if (cm->error_resilient_mode && seq_params->enable_order_hint) {
-      RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
-      for (int ref_idx = 0; ref_idx < REF_FRAMES; ref_idx++) {
-        // Get buffer index
-        const int buf_idx = cm->ref_frame_map[ref_idx];
-        assert(buf_idx >= 0 && buf_idx < FRAME_BUFFERS);
-
-        // Write order hint to bit stream
-        aom_wb_write_literal(wb, frame_bufs[buf_idx].cur_frame_offset,
-                             seq_params->order_hint_bits_minus_1 + 1);
-      }
-    }
-  }
-
-  if (cm->frame_type == KEY_FRAME) {
-    write_frame_size(cm, frame_size_override_flag, wb);
-    assert(!av1_superres_scaled(cm) || !cm->allow_intrabc);
-    if (cm->allow_screen_content_tools && !av1_superres_scaled(cm))
-      aom_wb_write_bit(wb, cm->allow_intrabc);
-    // all eight fbs are refreshed, pick one that will live long enough
-    cm->fb_of_context_type[REGULAR_FRAME] = 0;
-  } else {
-    if (cm->frame_type == INTRA_ONLY_FRAME) {
-      write_frame_size(cm, frame_size_override_flag, wb);
-      assert(!av1_superres_scaled(cm) || !cm->allow_intrabc);
-      if (cm->allow_screen_content_tools && !av1_superres_scaled(cm))
-        aom_wb_write_bit(wb, cm->allow_intrabc);
-    } else if (cm->frame_type == INTER_FRAME || frame_is_sframe(cm)) {
-      MV_REFERENCE_FRAME ref_frame;
-
-      // NOTE: Error resilient mode turns off frame_refs_short_signaling
-      //       automatically.
-#define FRAME_REFS_SHORT_SIGNALING 0
-#if FRAME_REFS_SHORT_SIGNALING
-      cm->frame_refs_short_signaling = seq_params->enable_order_hint;
-#endif  // FRAME_REFS_SHORT_SIGNALING
-
-      if (cm->frame_refs_short_signaling) {
-        // NOTE(zoeliu@google.com):
-        //   An example solution for encoder-side implementation on frame refs
-        //   short signaling, which is only turned on when the encoder side
-        //   decision on ref frames is identical to that at the decoder side.
-        check_frame_refs_short_signaling(cpi);
-      }
-
-      if (seq_params->enable_order_hint)
-        aom_wb_write_bit(wb, cm->frame_refs_short_signaling);
-
-      if (cm->frame_refs_short_signaling) {
-        const int lst_ref = get_ref_frame_map_idx(cpi, LAST_FRAME);
-        aom_wb_write_literal(wb, lst_ref, REF_FRAMES_LOG2);
-
-        const int gld_ref = get_ref_frame_map_idx(cpi, GOLDEN_FRAME);
-        aom_wb_write_literal(wb, gld_ref, REF_FRAMES_LOG2);
-      }
-
-      for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-        assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
-        if (!cm->frame_refs_short_signaling)
-          aom_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
-                               REF_FRAMES_LOG2);
-        if (seq_params->frame_id_numbers_present_flag) {
-          int i = get_ref_frame_map_idx(cpi, ref_frame);
-          int frame_id_len = seq_params->frame_id_length;
-          int diff_len = seq_params->delta_frame_id_length;
-          int delta_frame_id_minus_1 =
-              ((cm->current_frame_id - cm->ref_frame_id[i] +
-                (1 << frame_id_len)) %
-               (1 << frame_id_len)) -
-              1;
-          if (delta_frame_id_minus_1 < 0 ||
-              delta_frame_id_minus_1 >= (1 << diff_len))
-            cm->invalid_delta_frame_id_minus_1 = 1;
-          aom_wb_write_literal(wb, delta_frame_id_minus_1, diff_len);
-        }
-      }
-
-      if (!cm->error_resilient_mode && frame_size_override_flag) {
-        write_frame_size_with_refs(cpi, wb);
-      } else {
-        write_frame_size(cm, frame_size_override_flag, wb);
-      }
-
-      if (cm->cur_frame_force_integer_mv) {
-        cm->allow_high_precision_mv = 0;
-      } else {
-        aom_wb_write_bit(wb, cm->allow_high_precision_mv);
-      }
-      fix_interp_filter(cm, cpi->td.counts);
-      write_frame_interp_filter(cm->interp_filter, wb);
-      aom_wb_write_bit(wb, cm->switchable_motion_mode);
-      if (frame_might_allow_ref_frame_mvs(cm)) {
-        aom_wb_write_bit(wb, cm->allow_ref_frame_mvs);
-      } else {
-        assert(cm->allow_ref_frame_mvs == 0);
-      }
-    }
-  }
-
-  const int might_bwd_adapt =
-      !(seq_params->reduced_still_picture_hdr) && !(cm->disable_cdf_update);
-  if (cm->large_scale_tile)
-    cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
-
-  if (might_bwd_adapt) {
-    aom_wb_write_bit(
-        wb, cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_DISABLED);
-  }
-
-  write_tile_info(cm, saved_wb, wb);
-  encode_quantization(cm, wb);
-  encode_segmentation(cm, xd, wb);
-
-  if (cm->delta_q_present_flag) assert(cm->base_qindex > 0);
-  if (cm->base_qindex > 0) {
-    aom_wb_write_bit(wb, cm->delta_q_present_flag);
-    if (cm->delta_q_present_flag) {
-      aom_wb_write_literal(wb, get_msb(cm->delta_q_res), 2);
-      xd->current_qindex = cm->base_qindex;
-      if (cm->allow_intrabc)
-        assert(cm->delta_lf_present_flag == 0);
-      else
-        aom_wb_write_bit(wb, cm->delta_lf_present_flag);
-      if (cm->delta_lf_present_flag) {
-        aom_wb_write_literal(wb, get_msb(cm->delta_lf_res), 2);
-        aom_wb_write_bit(wb, cm->delta_lf_multi);
-        av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
-      }
-    }
-  }
-
-  if (cm->all_lossless) {
-    assert(!av1_superres_scaled(cm));
-  } else {
-    if (!cm->coded_lossless) {
-      encode_loopfilter(cm, wb);
-      encode_cdef(cm, wb);
-    }
-    encode_restoration_mode(cm, wb);
-  }
-
-  write_tx_mode(cm, &cm->tx_mode, wb);
-
-  if (cpi->allow_comp_inter_inter) {
-    const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
-
-    aom_wb_write_bit(wb, use_hybrid_pred);
-  }
-
-  if (cm->is_skip_mode_allowed) aom_wb_write_bit(wb, cm->skip_mode_flag);
-
-  if (frame_might_allow_warped_motion(cm))
-    aom_wb_write_bit(wb, cm->allow_warped_motion);
-  else
-    assert(!cm->allow_warped_motion);
-
-  aom_wb_write_bit(wb, cm->reduced_tx_set_used);
-
-  if (!frame_is_intra_only(cm)) write_global_motion(cpi, wb);
-
-  if (seq_params->film_grain_params_present &&
-      (cm->show_frame || cm->showable_frame)) {
-    int flip_back_update_parameters_flag = 0;
-    if (cm->frame_type != INTER_FRAME &&
-        cm->film_grain_params.update_parameters == 0) {
-      cm->film_grain_params.update_parameters = 1;
-      flip_back_update_parameters_flag = 1;
-    }
-    write_film_grain_params(cpi, wb);
-
-    if (flip_back_update_parameters_flag)
-      cm->film_grain_params.update_parameters = 0;
-  }
-
-  if (cm->large_scale_tile) write_ext_tile_info(cm, saved_wb, wb);
-}
-
-static int choose_size_bytes(uint32_t size, int spare_msbs) {
-  // Choose the number of bytes required to represent size, without
-  // using the 'spare_msbs' number of most significant bits.
-
-  // Make sure we will fit in 4 bytes to start with..
-  if (spare_msbs > 0 && size >> (32 - spare_msbs) != 0) return -1;
-
-  // Normalise to 32 bits
-  size <<= spare_msbs;
-
-  if (size >> 24 != 0)
-    return 4;
-  else if (size >> 16 != 0)
-    return 3;
-  else if (size >> 8 != 0)
-    return 2;
-  else
-    return 1;
-}
-
-static void mem_put_varsize(uint8_t *const dst, const int sz, const int val) {
-  switch (sz) {
-    case 1: dst[0] = (uint8_t)(val & 0xff); break;
-    case 2: mem_put_le16(dst, val); break;
-    case 3: mem_put_le24(dst, val); break;
-    case 4: mem_put_le32(dst, val); break;
-    default: assert(0 && "Invalid size"); break;
-  }
-}
-
-static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
-                       const uint32_t data_size, const uint32_t max_tile_size,
-                       const uint32_t max_tile_col_size,
-                       int *const tile_size_bytes,
-                       int *const tile_col_size_bytes) {
-  // Choose the tile size bytes (tsb) and tile column size bytes (tcsb)
-  int tsb;
-  int tcsb;
-
-  if (cm->large_scale_tile) {
-    // The top bit in the tile size field indicates tile copy mode, so we
-    // have 1 less bit to code the tile size
-    tsb = choose_size_bytes(max_tile_size, 1);
-    tcsb = choose_size_bytes(max_tile_col_size, 0);
-  } else {
-    tsb = choose_size_bytes(max_tile_size, 0);
-    tcsb = 4;  // This is ignored
-    (void)max_tile_col_size;
-  }
-
-  assert(tsb > 0);
-  assert(tcsb > 0);
-
-  *tile_size_bytes = tsb;
-  *tile_col_size_bytes = tcsb;
-  if (tsb == 4 && tcsb == 4) return data_size;
-
-  uint32_t wpos = 0;
-  uint32_t rpos = 0;
-
-  if (cm->large_scale_tile) {
-    int tile_row;
-    int tile_col;
-
-    for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
-      // All but the last column has a column header
-      if (tile_col < cm->tile_cols - 1) {
-        uint32_t tile_col_size = mem_get_le32(dst + rpos);
-        rpos += 4;
-
-        // Adjust the tile column size by the number of bytes removed
-        // from the tile size fields.
-        tile_col_size -= (4 - tsb) * cm->tile_rows;
-
-        mem_put_varsize(dst + wpos, tcsb, tile_col_size);
-        wpos += tcsb;
-      }
-
-      for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
-        // All, including the last row has a header
-        uint32_t tile_header = mem_get_le32(dst + rpos);
-        rpos += 4;
-
-        // If this is a copy tile, we need to shift the MSB to the
-        // top bit of the new width, and there is no data to copy.
-        if (tile_header >> 31 != 0) {
-          if (tsb < 4) tile_header >>= 32 - 8 * tsb;
-          mem_put_varsize(dst + wpos, tsb, tile_header);
-          wpos += tsb;
-        } else {
-          mem_put_varsize(dst + wpos, tsb, tile_header);
-          wpos += tsb;
-
-          tile_header += AV1_MIN_TILE_SIZE_BYTES;
-          memmove(dst + wpos, dst + rpos, tile_header);
-          rpos += tile_header;
-          wpos += tile_header;
-        }
-      }
-    }
-
-    assert(rpos > wpos);
-    assert(rpos == data_size);
-
-    return wpos;
-  }
-  const int n_tiles = cm->tile_cols * cm->tile_rows;
-  int n;
-
-  for (n = 0; n < n_tiles; n++) {
-    int tile_size;
-
-    if (n == n_tiles - 1) {
-      tile_size = data_size - rpos;
-    } else {
-      tile_size = mem_get_le32(dst + rpos);
-      rpos += 4;
-      mem_put_varsize(dst + wpos, tsb, tile_size);
-      tile_size += AV1_MIN_TILE_SIZE_BYTES;
-      wpos += tsb;
-    }
-
-    memmove(dst + wpos, dst + rpos, tile_size);
-
-    rpos += tile_size;
-    wpos += tile_size;
-  }
-
-  assert(rpos > wpos);
-  assert(rpos == data_size);
-
-  return wpos;
-}
-
-uint32_t write_obu_header(OBU_TYPE obu_type, int obu_extension,
-                          uint8_t *const dst) {
-  struct aom_write_bit_buffer wb = { dst, 0 };
-  uint32_t size = 0;
-
-  aom_wb_write_literal(&wb, 0, 1);  // forbidden bit.
-  aom_wb_write_literal(&wb, (int)obu_type, 4);
-  aom_wb_write_literal(&wb, obu_extension ? 1 : 0, 1);
-  aom_wb_write_literal(&wb, 1, 1);  // obu_has_payload_length_field
-  aom_wb_write_literal(&wb, 0, 1);  // reserved
-
-  if (obu_extension) {
-    aom_wb_write_literal(&wb, obu_extension & 0xFF, 8);
-  }
-
-  size = aom_wb_bytes_written(&wb);
-  return size;
-}
-
-int write_uleb_obu_size(uint32_t obu_header_size, uint32_t obu_payload_size,
-                        uint8_t *dest) {
-  const uint32_t obu_size = obu_payload_size;
-  const uint32_t offset = obu_header_size;
-  size_t coded_obu_size = 0;
-
-  if (aom_uleb_encode(obu_size, sizeof(obu_size), dest + offset,
-                      &coded_obu_size) != 0) {
-    return AOM_CODEC_ERROR;
-  }
-
-  return AOM_CODEC_OK;
-}
-
-static size_t obu_memmove(uint32_t obu_header_size, uint32_t obu_payload_size,
-                          uint8_t *data) {
-  const size_t length_field_size = aom_uleb_size_in_bytes(obu_payload_size);
-  const uint32_t move_dst_offset =
-      (uint32_t)length_field_size + obu_header_size;
-  const uint32_t move_src_offset = obu_header_size;
-  const uint32_t move_size = obu_payload_size;
-  memmove(data + move_dst_offset, data + move_src_offset, move_size);
-  return length_field_size;
-}
-
-static void add_trailing_bits(struct aom_write_bit_buffer *wb) {
-  if (aom_wb_is_byte_aligned(wb)) {
-    aom_wb_write_literal(wb, 0x80, 8);
-  } else {
-    // assumes that the other bits are already 0s
-    aom_wb_write_bit(wb, 1);
-  }
-}
-
-static void write_bitstream_level(BitstreamLevel bl,
-                                  struct aom_write_bit_buffer *wb) {
-  uint8_t seq_level_idx = major_minor_to_seq_level_idx(bl);
-  assert(is_valid_seq_level_idx(seq_level_idx));
-  aom_wb_write_literal(wb, seq_level_idx, LEVEL_BITS);
-}
-
-uint32_t write_sequence_header_obu(AV1_COMP *cpi, uint8_t *const dst) {
-  AV1_COMMON *const cm = &cpi->common;
-  struct aom_write_bit_buffer wb = { dst, 0 };
-  uint32_t size = 0;
-
-  write_profile(cm->seq_params.profile, &wb);
-
-  // Still picture or not
-  aom_wb_write_bit(&wb, cm->seq_params.still_picture);
-  assert(IMPLIES(!cm->seq_params.still_picture,
-                 !cm->seq_params.reduced_still_picture_hdr));
-  // whether to use reduced still picture header
-  aom_wb_write_bit(&wb, cm->seq_params.reduced_still_picture_hdr);
-
-  if (cm->seq_params.reduced_still_picture_hdr) {
-    assert(cm->timing_info_present == 0);
-    assert(cm->seq_params.decoder_model_info_present_flag == 0);
-    assert(cm->seq_params.display_model_info_present_flag == 0);
-    write_bitstream_level(cm->seq_params.level[0], &wb);
-  } else {
-    aom_wb_write_bit(&wb, cm->timing_info_present);  // timing info present flag
-
-    if (cm->timing_info_present) {
-      // timing_info
-      write_timing_info_header(cm, &wb);
-      aom_wb_write_bit(&wb, cm->seq_params.decoder_model_info_present_flag);
-      if (cm->seq_params.decoder_model_info_present_flag) {
-        write_decoder_model_info(cm, &wb);
-      }
-    }
-    aom_wb_write_bit(&wb, cm->seq_params.display_model_info_present_flag);
-    aom_wb_write_literal(&wb, cm->seq_params.operating_points_cnt_minus_1,
-                         OP_POINTS_CNT_MINUS_1_BITS);
-    int i;
-    for (i = 0; i < cm->seq_params.operating_points_cnt_minus_1 + 1; i++) {
-      aom_wb_write_literal(&wb, cm->seq_params.operating_point_idc[i],
-                           OP_POINTS_IDC_BITS);
-      write_bitstream_level(cm->seq_params.level[i], &wb);
-      if (cm->seq_params.level[i].major > 3)
-        aom_wb_write_bit(&wb, cm->seq_params.tier[i]);
-      if (cm->seq_params.decoder_model_info_present_flag) {
-        aom_wb_write_bit(&wb,
-                         cm->op_params[i].decoder_model_param_present_flag);
-        if (cm->op_params[i].decoder_model_param_present_flag)
-          write_dec_model_op_parameters(cm, &wb, i);
-      }
-      if (cm->seq_params.display_model_info_present_flag) {
-        aom_wb_write_bit(&wb,
-                         cm->op_params[i].display_model_param_present_flag);
-        if (cm->op_params[i].display_model_param_present_flag) {
-          assert(cm->op_params[i].initial_display_delay <= 10);
-          aom_wb_write_literal(&wb, cm->op_params[i].initial_display_delay - 1,
-                               4);
-        }
-      }
-    }
-  }
-  write_sequence_header(cpi, &wb);
-
-  write_color_config(&cm->seq_params, &wb);
-
-  aom_wb_write_bit(&wb, cm->seq_params.film_grain_params_present);
-
-  add_trailing_bits(&wb);
-
-  size = aom_wb_bytes_written(&wb);
-  return size;
-}
-
-static uint32_t write_frame_header_obu(AV1_COMP *cpi,
-                                       struct aom_write_bit_buffer *saved_wb,
-                                       uint8_t *const dst,
-                                       int append_trailing_bits) {
-  struct aom_write_bit_buffer wb = { dst, 0 };
-  write_uncompressed_header_obu(cpi, saved_wb, &wb);
-  if (append_trailing_bits) add_trailing_bits(&wb);
-  return aom_wb_bytes_written(&wb);
-}
-
-static uint32_t write_tile_group_header(uint8_t *const dst, int startTile,
-                                        int endTile, int tiles_log2,
-                                        int tile_start_and_end_present_flag) {
-  struct aom_write_bit_buffer wb = { dst, 0 };
-  uint32_t size = 0;
-
-  if (!tiles_log2) return size;
-
-  aom_wb_write_bit(&wb, tile_start_and_end_present_flag);
-
-  if (tile_start_and_end_present_flag) {
-    aom_wb_write_literal(&wb, startTile, tiles_log2);
-    aom_wb_write_literal(&wb, endTile, tiles_log2);
-  }
-
-  size = aom_wb_bytes_written(&wb);
-  return size;
-}
-
-typedef struct {
-  uint8_t *frame_header;
-  size_t obu_header_byte_offset;
-  size_t total_length;
-} FrameHeaderInfo;
-
-static uint32_t write_tiles_in_tg_obus(AV1_COMP *const cpi, uint8_t *const dst,
-                                       struct aom_write_bit_buffer *saved_wb,
-                                       uint8_t obu_extension_header,
-                                       const FrameHeaderInfo *fh_info) {
-  AV1_COMMON *const cm = &cpi->common;
-  aom_writer mode_bc;
-  int tile_row, tile_col;
-  TileBufferEnc(*const tile_buffers)[MAX_TILE_COLS] = cpi->tile_buffers;
-  uint32_t total_size = 0;
-  const int tile_cols = cm->tile_cols;
-  const int tile_rows = cm->tile_rows;
-  unsigned int tile_size = 0;
-  unsigned int max_tile_size = 0;
-  unsigned int max_tile_col_size = 0;
-  const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
-  // Fixed size tile groups for the moment
-  const int num_tg_hdrs = cm->num_tg;
-  const int tg_size =
-      (cm->large_scale_tile)
-          ? 1
-          : (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs;
-  int tile_count = 0;
-  int curr_tg_data_size = 0;
-  uint8_t *data = dst;
-  int new_tg = 1;
-  const int have_tiles = tile_cols * tile_rows > 1;
-  int first_tg = 1;
-
-  cm->largest_tile_id = 0;
-
-  if (cm->large_scale_tile) {
-    // For large_scale_tile case, we always have only one tile group, so it can
-    // be written as an OBU_FRAME.
-    const OBU_TYPE obu_type = OBU_FRAME;
-    const uint32_t tg_hdr_size = write_obu_header(obu_type, 0, data);
-    data += tg_hdr_size;
-
-    const uint32_t frame_header_size =
-        write_frame_header_obu(cpi, saved_wb, data, 0);
-    data += frame_header_size;
-    total_size += frame_header_size;
-
-#define EXT_TILE_DEBUG 0
-#if EXT_TILE_DEBUG
-    {
-      char fn[20] = "./fh";
-      fn[4] = cm->current_video_frame / 100 + '0';
-      fn[5] = (cm->current_video_frame % 100) / 10 + '0';
-      fn[6] = (cm->current_video_frame % 10) + '0';
-      fn[7] = '\0';
-      av1_print_uncompressed_frame_header(data - frame_header_size,
-                                          frame_header_size, fn);
-    }
-#endif  // EXT_TILE_DEBUG
-#undef EXT_TILE_DEBUG
-
-    int tile_size_bytes = 0;
-    int tile_col_size_bytes = 0;
-
-    for (tile_col = 0; tile_col < tile_cols; tile_col++) {
-      TileInfo tile_info;
-      const int is_last_col = (tile_col == tile_cols - 1);
-      const uint32_t col_offset = total_size;
-
-      av1_tile_set_col(&tile_info, cm, tile_col);
-
-      // The last column does not have a column header
-      if (!is_last_col) total_size += 4;
-
-      for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-        TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
-        const int data_offset = have_tiles ? 4 : 0;
-        const int tile_idx = tile_row * tile_cols + tile_col;
-        TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
-        av1_tile_set_row(&tile_info, cm, tile_row);
-
-        buf->data = dst + total_size + tg_hdr_size;
-
-        // Is CONFIG_EXT_TILE = 1, every tile in the row has a header,
-        // even for the last one, unless no tiling is used at all.
-        total_size += data_offset;
-        // Initialise tile context from the frame context
-        this_tile->tctx = *cm->fc;
-        cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
-        mode_bc.allow_update_cdf = !cm->large_scale_tile;
-        mode_bc.allow_update_cdf =
-            mode_bc.allow_update_cdf && !cm->disable_cdf_update;
-        aom_start_encode(&mode_bc, buf->data + data_offset);
-        write_modes(cpi, &tile_info, &mode_bc, tile_row, tile_col);
-        aom_stop_encode(&mode_bc);
-        tile_size = mode_bc.pos;
-        buf->size = tile_size;
-
-        // Record the maximum tile size we see, so we can compact headers later.
-        if (tile_size > max_tile_size) {
-          max_tile_size = tile_size;
-          cm->largest_tile_id = tile_cols * tile_row + tile_col;
-        }
-
-        if (have_tiles) {
-          // tile header: size of this tile, or copy offset
-          uint32_t tile_header = tile_size - AV1_MIN_TILE_SIZE_BYTES;
-          const int tile_copy_mode =
-              ((AOMMAX(cm->tile_width, cm->tile_height) << MI_SIZE_LOG2) <= 256)
-                  ? 1
-                  : 0;
-
-          // If tile_copy_mode = 1, check if this tile is a copy tile.
-          // Very low chances to have copy tiles on the key frames, so don't
-          // search on key frames to reduce unnecessary search.
-          if (cm->frame_type != KEY_FRAME && tile_copy_mode) {
-            const int identical_tile_offset =
-                find_identical_tile(tile_row, tile_col, tile_buffers);
-
-            if (identical_tile_offset > 0) {
-              tile_size = 0;
-              tile_header = identical_tile_offset | 0x80;
-              tile_header <<= 24;
-            }
-          }
-
-          mem_put_le32(buf->data, tile_header);
-        }
-
-        total_size += tile_size;
-      }
-
-      if (!is_last_col) {
-        uint32_t col_size = total_size - col_offset - 4;
-        mem_put_le32(dst + col_offset + tg_hdr_size, col_size);
-
-        // Record the maximum tile column size we see.
-        max_tile_col_size = AOMMAX(max_tile_col_size, col_size);
-      }
-    }
-
-    if (have_tiles) {
-      total_size = remux_tiles(cm, data, total_size - frame_header_size,
-                               max_tile_size, max_tile_col_size,
-                               &tile_size_bytes, &tile_col_size_bytes);
-      total_size += frame_header_size;
-    }
-
-    // In EXT_TILE case, only use 1 tile group. Follow the obu syntax, write
-    // current tile group size before tile data(include tile column header).
-    // Tile group size doesn't include the bytes storing tg size.
-    total_size += tg_hdr_size;
-    const uint32_t obu_payload_size = total_size - tg_hdr_size;
-    const size_t length_field_size =
-        obu_memmove(tg_hdr_size, obu_payload_size, dst);
-    if (write_uleb_obu_size(tg_hdr_size, obu_payload_size, dst) !=
-        AOM_CODEC_OK) {
-      assert(0);
-    }
-    total_size += (uint32_t)length_field_size;
-    saved_wb->bit_buffer += length_field_size;
-
-    // Now fill in the gaps in the uncompressed header.
-    if (have_tiles) {
-      assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4);
-      aom_wb_overwrite_literal(saved_wb, tile_col_size_bytes - 1, 2);
-
-      assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
-      aom_wb_overwrite_literal(saved_wb, tile_size_bytes - 1, 2);
-    }
-    return total_size;
-  }
-
-  uint32_t obu_header_size = 0;
-  uint8_t *tile_data_start = dst + total_size;
-  for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-    TileInfo tile_info;
-    av1_tile_set_row(&tile_info, cm, tile_row);
-
-    for (tile_col = 0; tile_col < tile_cols; tile_col++) {
-      const int tile_idx = tile_row * tile_cols + tile_col;
-      TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
-      TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
-      int is_last_tile_in_tg = 0;
-
-      if (new_tg) {
-        data = dst + total_size;
-
-        // A new tile group begins at this tile.  Write the obu header and
-        // tile group header
-        const OBU_TYPE obu_type =
-            (num_tg_hdrs == 1) ? OBU_FRAME : OBU_TILE_GROUP;
-        curr_tg_data_size =
-            write_obu_header(obu_type, obu_extension_header, data);
-        obu_header_size = curr_tg_data_size;
-
-        if (num_tg_hdrs == 1) {
-          curr_tg_data_size += write_frame_header_obu(
-              cpi, saved_wb, data + curr_tg_data_size, 0);
-        }
-        curr_tg_data_size += write_tile_group_header(
-            data + curr_tg_data_size, tile_idx,
-            AOMMIN(tile_idx + tg_size - 1, tile_cols * tile_rows - 1),
-            n_log2_tiles, cm->num_tg > 1);
-        total_size += curr_tg_data_size;
-        tile_data_start += curr_tg_data_size;
-        new_tg = 0;
-        tile_count = 0;
-      }
-      tile_count++;
-      av1_tile_set_col(&tile_info, cm, tile_col);
-
-      if (tile_count == tg_size || tile_idx == (tile_cols * tile_rows - 1)) {
-        is_last_tile_in_tg = 1;
-        new_tg = 1;
-      } else {
-        is_last_tile_in_tg = 0;
-      }
-
-      buf->data = dst + total_size;
-
-      // The last tile of the tile group does not have a header.
-      if (!is_last_tile_in_tg) total_size += 4;
-
-      // Initialise tile context from the frame context
-      this_tile->tctx = *cm->fc;
-      cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
-      mode_bc.allow_update_cdf = 1;
-      mode_bc.allow_update_cdf =
-          mode_bc.allow_update_cdf && !cm->disable_cdf_update;
-      const int num_planes = av1_num_planes(cm);
-      av1_reset_loop_restoration(&cpi->td.mb.e_mbd, num_planes);
-
-      aom_start_encode(&mode_bc, dst + total_size);
-      write_modes(cpi, &tile_info, &mode_bc, tile_row, tile_col);
-      aom_stop_encode(&mode_bc);
-      tile_size = mode_bc.pos;
-      assert(tile_size >= AV1_MIN_TILE_SIZE_BYTES);
-
-      curr_tg_data_size += (tile_size + (is_last_tile_in_tg ? 0 : 4));
-      buf->size = tile_size;
-      if (tile_size > max_tile_size) {
-        cm->largest_tile_id = tile_cols * tile_row + tile_col;
-        max_tile_size = tile_size;
-      }
-
-      if (!is_last_tile_in_tg) {
-        // size of this tile
-        mem_put_le32(buf->data, tile_size - AV1_MIN_TILE_SIZE_BYTES);
-      } else {
-        // write current tile group size
-        const uint32_t obu_payload_size = curr_tg_data_size - obu_header_size;
-        const size_t length_field_size =
-            obu_memmove(obu_header_size, obu_payload_size, data);
-        if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
-            AOM_CODEC_OK) {
-          assert(0);
-        }
-        curr_tg_data_size += (int)length_field_size;
-        total_size += (uint32_t)length_field_size;
-        tile_data_start += length_field_size;
-        if (num_tg_hdrs == 1) {
-          // if this tg is combined with the frame header then update saved
-          // frame header base offset accroding to length field size
-          saved_wb->bit_buffer += length_field_size;
-        }
-
-        if (!first_tg && cm->error_resilient_mode) {
-          // Make room for a duplicate Frame Header OBU.
-          memmove(data + fh_info->total_length, data, curr_tg_data_size);
-
-          // Insert a copy of the Frame Header OBU.
-          memcpy(data, fh_info->frame_header, fh_info->total_length);
-
-          // Force context update tile to be the first tile in error
-          // resiliant mode as the duplicate frame headers will have
-          // context_update_tile_id set to 0
-          cm->largest_tile_id = 0;
-
-          // Rewrite the OBU header to change the OBU type to Redundant Frame
-          // Header.
-          write_obu_header(OBU_REDUNDANT_FRAME_HEADER, obu_extension_header,
-                           &data[fh_info->obu_header_byte_offset]);
-
-          data += fh_info->total_length;
-
-          curr_tg_data_size += (int)(fh_info->total_length);
-          total_size += (uint32_t)(fh_info->total_length);
-        }
-        first_tg = 0;
-      }
-
-      total_size += tile_size;
-    }
-  }
-
-  if (have_tiles) {
-    // Fill in context_update_tile_id indicating the tile to use for the
-    // cdf update. The encoder currently sets it to the largest tile
-    // (but is up to the encoder)
-    aom_wb_overwrite_literal(saved_wb, cm->largest_tile_id,
-                             cm->log2_tile_cols + cm->log2_tile_rows);
-    // If more than one tile group. tile_size_bytes takes the default value 4
-    // and does not need to be set. For a single tile group it is set in the
-    // section below.
-    if (num_tg_hdrs == 1) {
-      int tile_size_bytes = 4, unused;
-      const uint32_t tile_data_offset = (uint32_t)(tile_data_start - dst);
-      const uint32_t tile_data_size = total_size - tile_data_offset;
-
-      total_size =
-          remux_tiles(cm, tile_data_start, tile_data_size, max_tile_size,
-                      max_tile_col_size, &tile_size_bytes, &unused);
-      total_size += tile_data_offset;
-      assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
-
-      aom_wb_overwrite_literal(saved_wb, tile_size_bytes - 1, 2);
-
-      // Update the OBU length if remux_tiles() reduced the size.
-      uint64_t payload_size;
-      size_t length_field_size;
-      int res =
-          aom_uleb_decode(dst + obu_header_size, total_size - obu_header_size,
-                          &payload_size, &length_field_size);
-      assert(res == 0);
-      (void)res;
-
-      const uint64_t new_payload_size =
-          total_size - obu_header_size - length_field_size;
-      if (new_payload_size != payload_size) {
-        size_t new_length_field_size;
-        res = aom_uleb_encode(new_payload_size, length_field_size,
-                              dst + obu_header_size, &new_length_field_size);
-        assert(res == 0);
-        if (new_length_field_size < length_field_size) {
-          const size_t src_offset = obu_header_size + length_field_size;
-          const size_t dst_offset = obu_header_size + new_length_field_size;
-          memmove(dst + dst_offset, dst + src_offset, (size_t)payload_size);
-          total_size -= (int)(length_field_size - new_length_field_size);
-        }
-      }
-    }
-  }
-  return total_size;
-}
-
-int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) {
-  uint8_t *data = dst;
-  uint32_t data_size;
-  AV1_COMMON *const cm = &cpi->common;
-  uint32_t obu_header_size = 0;
-  uint32_t obu_payload_size = 0;
-  FrameHeaderInfo fh_info = { NULL, 0, 0 };
-  const uint8_t obu_extension_header =
-      cm->temporal_layer_id << 5 | cm->spatial_layer_id << 3 | 0;
-
-#if CONFIG_BITSTREAM_DEBUG
-  bitstream_queue_reset_write();
-#endif
-
-  // The TD is now written outside the frame encode loop
-
-  // write sequence header obu if KEY_FRAME, preceded by 4-byte size
-  if (cm->frame_type == KEY_FRAME && cm->show_frame) {
-    obu_header_size = write_obu_header(OBU_SEQUENCE_HEADER, 0, data);
-
-    obu_payload_size = write_sequence_header_obu(cpi, data + obu_header_size);
-    const size_t length_field_size =
-        obu_memmove(obu_header_size, obu_payload_size, data);
-    if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
-        AOM_CODEC_OK) {
-      return AOM_CODEC_ERROR;
-    }
-
-    data += obu_header_size + obu_payload_size + length_field_size;
-  }
-
-  const int write_frame_header =
-      (cm->num_tg > 1 || encode_show_existing_frame(cm));
-  struct aom_write_bit_buffer saved_wb;
-  if (write_frame_header) {
-    // Write Frame Header OBU.
-    fh_info.frame_header = data;
-    obu_header_size =
-        write_obu_header(OBU_FRAME_HEADER, obu_extension_header, data);
-    obu_payload_size =
-        write_frame_header_obu(cpi, &saved_wb, data + obu_header_size, 1);
-
-    const size_t length_field_size =
-        obu_memmove(obu_header_size, obu_payload_size, data);
-    if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
-        AOM_CODEC_OK) {
-      return AOM_CODEC_ERROR;
-    }
-
-    fh_info.obu_header_byte_offset = 0;
-    fh_info.total_length =
-        obu_header_size + obu_payload_size + length_field_size;
-    data += fh_info.total_length;
-
-    // Since length_field_size is determined adaptively after frame header
-    // encoding, saved_wb must be adjusted accordingly.
-    saved_wb.bit_buffer += length_field_size;
-  }
-
-  if (encode_show_existing_frame(cm)) {
-    data_size = 0;
-  } else {
-    //  Each tile group obu will be preceded by 4-byte size of the tile group
-    //  obu
-    data_size = write_tiles_in_tg_obus(cpi, data, &saved_wb,
-                                       obu_extension_header, &fh_info);
-  }
-  data += data_size;
-  *size = data - dst;
-  return AOM_CODEC_OK;
-}
diff --git a/third_party/aom/av1/encoder/bitstream.h b/third_party/aom/av1/encoder/bitstream.h
deleted file mode 100644
index 465ccaed5..000000000
--- a/third_party/aom/av1/encoder/bitstream.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_BITSTREAM_H_
-#define AOM_AV1_ENCODER_BITSTREAM_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/encoder.h"
-
-struct aom_write_bit_buffer;
-
-// Writes only the OBU Sequence Header payload, and returns the size of the
-// payload written to 'dst'. This function does not write the OBU header, the
-// optional extension, or the OBU size to 'dst'.
-uint32_t write_sequence_header_obu(AV1_COMP *cpi, uint8_t *const dst);
-
-// Writes the OBU header byte, and the OBU header extension byte when
-// 'obu_extension' is non-zero. Returns number of bytes written to 'dst'.
-uint32_t write_obu_header(OBU_TYPE obu_type, int obu_extension,
-                          uint8_t *const dst);
-
-int write_uleb_obu_size(uint32_t obu_header_size, uint32_t obu_payload_size,
-                        uint8_t *dest);
-
-int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dest, size_t *size);
-
-static INLINE int av1_preserve_existing_gf(AV1_COMP *cpi) {
-  // Do not swap gf and arf indices for internal overlay frames
-  return cpi->rc.is_src_frame_alt_ref && !cpi->rc.is_src_frame_ext_arf;
-}
-
-void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
-                       int blk_row, int blk_col, int plane, TX_SIZE tx_size,
-                       aom_writer *w);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_BITSTREAM_H_
diff --git a/third_party/aom/av1/encoder/block.h b/third_party/aom/av1/encoder/block.h
deleted file mode 100644
index 0bc5dea82..000000000
--- a/third_party/aom/av1/encoder/block.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_BLOCK_H_
-#define AOM_AV1_ENCODER_BLOCK_H_
-
-#include "av1/common/entropymv.h"
-#include "av1/common/entropy.h"
-#include "av1/common/mvref_common.h"
-#include "av1/encoder/hash.h"
-#if CONFIG_DIST_8X8
-#include "aom/aomcx.h"
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
-  unsigned int sse;
-  int sum;
-  unsigned int var;
-} DIFF;
-
-typedef struct macroblock_plane {
-  DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
-  tran_low_t *qcoeff;
-  tran_low_t *coeff;
-  uint16_t *eobs;
-  uint8_t *txb_entropy_ctx;
-  struct buf_2d src;
-
-  // Quantizer setings
-  // These are used/accessed only in the quantization process
-  // RDO does not / must not depend on any of these values
-  // All values below share the coefficient scale/shift used in TX
-  const int16_t *quant_fp_QTX;
-  const int16_t *round_fp_QTX;
-  const int16_t *quant_QTX;
-  const int16_t *quant_shift_QTX;
-  const int16_t *zbin_QTX;
-  const int16_t *round_QTX;
-  const int16_t *dequant_QTX;
-} MACROBLOCK_PLANE;
-
-typedef struct {
-  int txb_skip_cost[TXB_SKIP_CONTEXTS][2];
-  int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3];
-  int base_cost[SIG_COEF_CONTEXTS][4];
-  int eob_extra_cost[EOB_COEF_CONTEXTS][2];
-  int dc_sign_cost[DC_SIGN_CONTEXTS][2];
-  int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1];
-} LV_MAP_COEFF_COST;
-
-typedef struct {
-  int eob_cost[2][11];
-} LV_MAP_EOB_COST;
-
-typedef struct {
-  tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
-  uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-  uint8_t txb_skip_ctx[MAX_MB_PLANE]
-                      [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-  int dc_sign_ctx[MAX_MB_PLANE]
-                 [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
-} CB_COEFF_BUFFER;
-
-typedef struct {
-  int16_t mode_context[MODE_CTX_REF_FRAMES];
-  // TODO(angiebird): Reduce the buffer size according to sb_type
-  tran_low_t *tcoeff[MAX_MB_PLANE];
-  uint16_t *eobs[MAX_MB_PLANE];
-  uint8_t *txb_skip_ctx[MAX_MB_PLANE];
-  int *dc_sign_ctx[MAX_MB_PLANE];
-  uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
-  CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
-  int_mv global_mvs[REF_FRAMES];
-  int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
-} MB_MODE_INFO_EXT;
-
-typedef struct {
-  int col_min;
-  int col_max;
-  int row_min;
-  int row_max;
-} MvLimits;
-
-typedef struct {
-  uint8_t best_palette_color_map[MAX_PALETTE_SQUARE];
-  int kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
-} PALETTE_BUFFER;
-
-typedef struct {
-  TX_SIZE tx_size;
-  TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN];
-  uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-  TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
-  RD_STATS rd_stats;
-  uint32_t hash_value;
-} MB_RD_INFO;
-
-#define RD_RECORD_BUFFER_LEN 8
-typedef struct {
-  MB_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN];  // Circular buffer.
-  int index_start;
-  int num;
-  CRC32C crc_calculator;  // Hash function.
-} MB_RD_RECORD;
-
-typedef struct {
-  int64_t dist;
-  int64_t sse;
-  int rate;
-  uint16_t eob;
-  TX_TYPE tx_type;
-  uint16_t entropy_context;
-  uint8_t txb_entropy_ctx;
-  uint8_t valid;
-  uint8_t fast;  // This is not being used now.
-} TXB_RD_INFO;
-
-#define TX_SIZE_RD_RECORD_BUFFER_LEN 256
-typedef struct {
-  uint32_t hash_vals[TX_SIZE_RD_RECORD_BUFFER_LEN];
-  TXB_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN];
-  int index_start;
-  int num;
-} TXB_RD_RECORD;
-
-typedef struct tx_size_rd_info_node {
-  TXB_RD_INFO *rd_info_array;  // Points to array of size TX_TYPES.
-  struct tx_size_rd_info_node *children[4];
-} TXB_RD_INFO_NODE;
-
-// Region size for mode decision sampling in the first pass of partition
-// search(two_pass_partition_search speed feature), in units of mi size(4).
-// Used by the mode_pruning_based_on_two_pass_partition_search speed feature.
-#define FIRST_PARTITION_PASS_SAMPLE_REGION 8
-#define FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2 3
-#define FIRST_PARTITION_PASS_STATS_TABLES                     \
-  (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) * \
-      (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
-#define FIRST_PARTITION_PASS_STATS_STRIDE \
-  (MAX_MIB_SIZE_LOG2 - FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
-
-static INLINE int av1_first_partition_pass_stats_index(int mi_row, int mi_col) {
-  const int row =
-      (mi_row & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
-  const int col =
-      (mi_col & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
-  return (row << FIRST_PARTITION_PASS_STATS_STRIDE) + col;
-}
-
-typedef struct {
-  uint8_t ref0_counts[REF_FRAMES];  // Counters for ref_frame[0].
-  uint8_t ref1_counts[REF_FRAMES];  // Counters for ref_frame[1].
-  int sample_counts;                // Number of samples collected.
-} FIRST_PARTITION_PASS_STATS;
-
-#define MAX_INTERP_FILTER_STATS 64
-typedef struct {
-  InterpFilters filters;
-  int_mv mv[2];
-  int8_t ref_frames[2];
-  COMPOUND_TYPE comp_type;
-} INTERPOLATION_FILTER_STATS;
-
-typedef struct macroblock MACROBLOCK;
-struct macroblock {
-  struct macroblock_plane plane[MAX_MB_PLANE];
-
-  // Determine if one would go with reduced complexity transform block
-  // search model to select prediction modes, or full complexity model
-  // to select transform kernel.
-  int rd_model;
-
-  // Indicate if the encoder is running in the first pass partition search.
-  // In that case, apply certain speed features therein to reduce the overhead
-  // cost in the first pass search.
-  int cb_partition_scan;
-
-  FIRST_PARTITION_PASS_STATS
-  first_partition_pass_stats[FIRST_PARTITION_PASS_STATS_TABLES];
-
-  // [comp_idx][saved stat_idx]
-  INTERPOLATION_FILTER_STATS interp_filter_stats[2][MAX_INTERP_FILTER_STATS];
-  int interp_filter_stats_idx[2];
-
-  // Activate constrained coding block partition search range.
-  int use_cb_search_range;
-
-  // Inter macroblock RD search info.
-  MB_RD_RECORD mb_rd_record;
-
-  // Inter transform block RD search info. for square TX sizes.
-  TXB_RD_RECORD txb_rd_record_8X8[(MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)];
-  TXB_RD_RECORD txb_rd_record_16X16[(MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2)];
-  TXB_RD_RECORD txb_rd_record_32X32[(MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3)];
-  TXB_RD_RECORD txb_rd_record_64X64[(MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4)];
-
-  // Intra transform block RD search info. for square TX sizes.
-  TXB_RD_RECORD txb_rd_record_intra;
-
-  MACROBLOCKD e_mbd;
-  MB_MODE_INFO_EXT *mbmi_ext;
-  int skip_block;
-  int qindex;
-
-  // The equivalent error at the current rdmult of one whole bit (not one
-  // bitcost unit).
-  int errorperbit;
-  // The equivalend SAD error of one (whole) bit at the current quantizer
-  // for large blocks.
-  int sadperbit16;
-  // The equivalend SAD error of one (whole) bit at the current quantizer
-  // for sub-8x8 blocks.
-  int sadperbit4;
-  int rdmult;
-  int mb_energy;
-  int sb_energy_level;
-  int *m_search_count_ptr;
-  int *ex_search_count_ptr;
-
-  unsigned int txb_split_count;
-
-  // These are set to their default values at the beginning, and then adjusted
-  // further in the encoding process.
-  BLOCK_SIZE min_partition_size;
-  BLOCK_SIZE max_partition_size;
-
-  unsigned int max_mv_context[REF_FRAMES];
-  unsigned int source_variance;
-  unsigned int pred_sse[REF_FRAMES];
-  int pred_mv_sad[REF_FRAMES];
-
-  int *nmvjointcost;
-  int nmv_vec_cost[MV_JOINTS];
-  int *nmvcost[2];
-  int *nmvcost_hp[2];
-  int **mv_cost_stack;
-  int **mvcost;
-
-  int32_t *wsrc_buf;
-  int32_t *mask_buf;
-  uint8_t *above_pred_buf;
-  uint8_t *left_pred_buf;
-
-  PALETTE_BUFFER *palette_buffer;
-
-  CONV_BUF_TYPE *tmp_conv_dst;
-  uint8_t *tmp_obmc_bufs[2];
-
-  // buffer for hash value calculation of a block
-  // used only in av1_get_block_hash_value()
-  // [first hash/second hash]
-  // [two buffers used ping-pong]
-  uint32_t *hash_value_buffer[2][2];
-
-  CRC_CALCULATOR crc_calculator1;
-  CRC_CALCULATOR crc_calculator2;
-  int g_crc_initialized;
-
-  // These define limits to motion vector components to prevent them
-  // from extending outside the UMV borders
-  MvLimits mv_limits;
-
-  uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-
-  int skip;
-  int skip_chroma_rd;
-  int skip_cost[SKIP_CONTEXTS][2];
-
-  int skip_mode;  // 0: off; 1: on
-  int skip_mode_cost[SKIP_CONTEXTS][2];
-
-  int compound_idx;
-
-  LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES];
-  LV_MAP_EOB_COST eob_costs[7][2];
-  uint16_t cb_offset;
-
-  // mode costs
-  int intra_inter_cost[INTRA_INTER_CONTEXTS][2];
-
-  int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
-  int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
-  int zeromv_mode_cost[GLOBALMV_MODE_CONTEXTS][2];
-  int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
-  int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
-
-  int comp_inter_cost[COMP_INTER_CONTEXTS][2];
-  int single_ref_cost[REF_CONTEXTS][SINGLE_REFS - 1][2];
-  int comp_ref_type_cost[COMP_REF_TYPE_CONTEXTS]
-                        [CDF_SIZE(COMP_REFERENCE_TYPES)];
-  int uni_comp_ref_cost[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1]
-                       [CDF_SIZE(2)];
-  // Cost for signaling ref_frame[0] (LAST_FRAME, LAST2_FRAME, LAST3_FRAME or
-  // GOLDEN_FRAME) in bidir-comp mode.
-  int comp_ref_cost[REF_CONTEXTS][FWD_REFS - 1][2];
-  // Cost for signaling ref_frame[1] (ALTREF_FRAME, ALTREF2_FRAME, or
-  // BWDREF_FRAME) in bidir-comp mode.
-  int comp_bwdref_cost[REF_CONTEXTS][BWD_REFS - 1][2];
-  int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
-  int compound_type_cost[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1];
-  int wedge_idx_cost[BLOCK_SIZES_ALL][16];
-  int interintra_cost[BLOCK_SIZE_GROUPS][2];
-  int wedge_interintra_cost[BLOCK_SIZES_ALL][2];
-  int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
-  int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
-  int motion_mode_cost1[BLOCK_SIZES_ALL][2];
-  int intra_uv_mode_cost[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES];
-  int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
-  int filter_intra_cost[BLOCK_SIZES_ALL][2];
-  int filter_intra_mode_cost[FILTER_INTRA_MODES];
-  int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
-  int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
-  int palette_y_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
-  int palette_uv_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
-  int palette_y_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
-                          [PALETTE_COLORS];
-  int palette_uv_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
-                           [PALETTE_COLORS];
-  int palette_y_mode_cost[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2];
-  int palette_uv_mode_cost[PALETTE_UV_MODE_CONTEXTS][2];
-  // The rate associated with each alpha codeword
-  int cfl_cost[CFL_JOINT_SIGNS][CFL_PRED_PLANES][CFL_ALPHABET_SIZE];
-  int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
-  int txfm_partition_cost[TXFM_PARTITION_CONTEXTS][2];
-  int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
-  int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
-                         [TX_TYPES];
-  int angle_delta_cost[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1];
-  int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES];
-  int wiener_restore_cost[2];
-  int sgrproj_restore_cost[2];
-  int intrabc_cost[2];
-
-  // Used to store sub partition's choices.
-  MV pred_mv[REF_FRAMES];
-
-  // Store the best motion vector during motion search
-  int_mv best_mv;
-  // Store the second best motion vector during full-pixel motion search
-  int_mv second_best_mv;
-
-  // use default transform and skip transform type search for intra modes
-  int use_default_intra_tx_type;
-  // use default transform and skip transform type search for inter modes
-  int use_default_inter_tx_type;
-#if CONFIG_DIST_8X8
-  int using_dist_8x8;
-  aom_tune_metric tune_metric;
-#endif  // CONFIG_DIST_8X8
-  int comp_idx_cost[COMP_INDEX_CONTEXTS][2];
-  int comp_group_idx_cost[COMP_GROUP_IDX_CONTEXTS][2];
-  // Bit flags for pruning tx type search, tx split, etc.
-  int tx_search_prune[EXT_TX_SET_TYPES];
-  int must_find_valid_partition;
-  int tx_split_prune_flag;  // Flag to skip tx split RD search.
-  int recalc_luma_mc_data;  // Flag to indicate recalculation of MC data during
-                            // interpolation filter search
-};
-
-static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
-  static const char LUT[BLOCK_SIZES_ALL] = {
-    0,  // BLOCK_4X4
-    1,  // BLOCK_4X8
-    1,  // BLOCK_8X4
-    0,  // BLOCK_8X8
-    1,  // BLOCK_8X16
-    1,  // BLOCK_16X8
-    0,  // BLOCK_16X16
-    1,  // BLOCK_16X32
-    1,  // BLOCK_32X16
-    0,  // BLOCK_32X32
-    1,  // BLOCK_32X64
-    1,  // BLOCK_64X32
-    0,  // BLOCK_64X64
-    0,  // BLOCK_64X128
-    0,  // BLOCK_128X64
-    0,  // BLOCK_128X128
-    1,  // BLOCK_4X16
-    1,  // BLOCK_16X4
-    1,  // BLOCK_8X32
-    1,  // BLOCK_32X8
-    1,  // BLOCK_16X64
-    1,  // BLOCK_64X16
-  };
-
-  return LUT[bsize];
-}
-
-static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd,
-                                     const MB_MODE_INFO *mbmi) {
-  return is_rect_tx_allowed_bsize(mbmi->sb_type) &&
-         !xd->lossless[mbmi->segment_id];
-}
-
-static INLINE int tx_size_to_depth(TX_SIZE tx_size, BLOCK_SIZE bsize) {
-  TX_SIZE ctx_size = max_txsize_rect_lookup[bsize];
-  int depth = 0;
-  while (tx_size != ctx_size) {
-    depth++;
-    ctx_size = sub_tx_size_map[ctx_size];
-    assert(depth <= MAX_TX_DEPTH);
-  }
-  return depth;
-}
-
-static INLINE void set_blk_skip(MACROBLOCK *x, int plane, int blk_idx,
-                                int skip) {
-  if (skip)
-    x->blk_skip[blk_idx] |= 1UL << plane;
-  else
-    x->blk_skip[blk_idx] &= ~(1UL << plane);
-#ifndef NDEBUG
-  // Set chroma planes to uninitialized states when luma is set to check if
-  // it will be set later
-  if (plane == 0) {
-    x->blk_skip[blk_idx] |= 1UL << (1 + 4);
-    x->blk_skip[blk_idx] |= 1UL << (2 + 4);
-  }
-
-  // Clear the initialization checking bit
-  x->blk_skip[blk_idx] &= ~(1UL << (plane + 4));
-#endif
-}
-
-static INLINE int is_blk_skip(MACROBLOCK *x, int plane, int blk_idx) {
-#ifndef NDEBUG
-  // Check if this is initialized
-  assert(!(x->blk_skip[blk_idx] & (1UL << (plane + 4))));
-
-  // The magic number is 0x77, this is to test if there is garbage data
-  assert((x->blk_skip[blk_idx] & 0x88) == 0);
-#endif
-  return (x->blk_skip[blk_idx] >> plane) & 1;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_BLOCK_H_
diff --git a/third_party/aom/av1/encoder/blockiness.c b/third_party/aom/av1/encoder/blockiness.c
deleted file mode 100644
index f7cff9e53..000000000
--- a/third_party/aom/av1/encoder/blockiness.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "av1/common/common.h"
-#include "av1/common/filter.h"
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-static int horizontal_filter(const uint8_t *s) {
-  return (s[1] - s[-2]) * 2 + (s[-1] - s[0]) * 6;
-}
-
-static int vertical_filter(const uint8_t *s, int p) {
-  return (s[p] - s[-2 * p]) * 2 + (s[-p] - s[0]) * 6;
-}
-
-static int variance(int sum, int sum_squared, int size) {
-  return sum_squared / size - (sum / size) * (sum / size);
-}
-// Calculate a blockiness level for a vertical block edge.
-// This function returns a new blockiness metric that's defined as
-
-//              p0 p1 p2 p3
-//              q0 q1 q2 q3
-// block edge ->
-//              r0 r1 r2 r3
-//              s0 s1 s2 s3
-
-// blockiness =  p0*-2+q0*6+r0*-6+s0*2 +
-//               p1*-2+q1*6+r1*-6+s1*2 +
-//               p2*-2+q2*6+r2*-6+s2*2 +
-//               p3*-2+q3*6+r3*-6+s3*2 ;
-
-// reconstructed_blockiness = abs(blockiness from reconstructed buffer -
-//                                blockiness from source buffer,0)
-//
-// I make the assumption that flat blocks are much more visible than high
-// contrast blocks. As such, I scale the result of the blockiness calc
-// by dividing the blockiness by the variance of the pixels on either side
-// of the edge as follows:
-// var_0 = (q0^2+q1^2+q2^2+q3^2) - ((q0 + q1 + q2 + q3) / 4 )^2
-// var_1 = (r0^2+r1^2+r2^2+r3^2) - ((r0 + r1 + r2 + r3) / 4 )^2
-// The returned blockiness is the scaled value
-// Reconstructed blockiness / ( 1 + var_0 + var_1 ) ;
-static int blockiness_vertical(const uint8_t *s, int sp, const uint8_t *r,
-                               int rp, int size) {
-  int s_blockiness = 0;
-  int r_blockiness = 0;
-  int sum_0 = 0;
-  int sum_sq_0 = 0;
-  int sum_1 = 0;
-  int sum_sq_1 = 0;
-  int i;
-  int var_0;
-  int var_1;
-  for (i = 0; i < size; ++i, s += sp, r += rp) {
-    s_blockiness += horizontal_filter(s);
-    r_blockiness += horizontal_filter(r);
-    sum_0 += s[0];
-    sum_sq_0 += s[0] * s[0];
-    sum_1 += s[-1];
-    sum_sq_1 += s[-1] * s[-1];
-  }
-  var_0 = variance(sum_0, sum_sq_0, size);
-  var_1 = variance(sum_1, sum_sq_1, size);
-  r_blockiness = abs(r_blockiness);
-  s_blockiness = abs(s_blockiness);
-
-  if (r_blockiness > s_blockiness)
-    return (r_blockiness - s_blockiness) / (1 + var_0 + var_1);
-  else
-    return 0;
-}
-
-// Calculate a blockiness level for a horizontal block edge
-// same as above.
-static int blockiness_horizontal(const uint8_t *s, int sp, const uint8_t *r,
-                                 int rp, int size) {
-  int s_blockiness = 0;
-  int r_blockiness = 0;
-  int sum_0 = 0;
-  int sum_sq_0 = 0;
-  int sum_1 = 0;
-  int sum_sq_1 = 0;
-  int i;
-  int var_0;
-  int var_1;
-  for (i = 0; i < size; ++i, ++s, ++r) {
-    s_blockiness += vertical_filter(s, sp);
-    r_blockiness += vertical_filter(r, rp);
-    sum_0 += s[0];
-    sum_sq_0 += s[0] * s[0];
-    sum_1 += s[-sp];
-    sum_sq_1 += s[-sp] * s[-sp];
-  }
-  var_0 = variance(sum_0, sum_sq_0, size);
-  var_1 = variance(sum_1, sum_sq_1, size);
-  r_blockiness = abs(r_blockiness);
-  s_blockiness = abs(s_blockiness);
-
-  if (r_blockiness > s_blockiness)
-    return (r_blockiness - s_blockiness) / (1 + var_0 + var_1);
-  else
-    return 0;
-}
-
-// This function returns the blockiness for the entire frame currently by
-// looking at all borders in steps of 4.
-double av1_get_blockiness(const unsigned char *img1, int img1_pitch,
-                          const unsigned char *img2, int img2_pitch, int width,
-                          int height) {
-  double blockiness = 0;
-  int i, j;
-  aom_clear_system_state();
-  for (i = 0; i < height;
-       i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
-    for (j = 0; j < width; j += 4) {
-      if (i > 0 && i < height && j > 0 && j < width) {
-        blockiness +=
-            blockiness_vertical(img1 + j, img1_pitch, img2 + j, img2_pitch, 4);
-        blockiness += blockiness_horizontal(img1 + j, img1_pitch, img2 + j,
-                                            img2_pitch, 4);
-      }
-    }
-  }
-  blockiness /= width * height / 16;
-  return blockiness;
-}
diff --git a/third_party/aom/av1/encoder/context_tree.c b/third_party/aom/av1/encoder/context_tree.c
deleted file mode 100644
index 57f59f304..000000000
--- a/third_party/aom/av1/encoder/context_tree.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/encoder.h"
-
-static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 1] = {
-  BLOCK_4X4, BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, BLOCK_128X128,
-};
-
-static void alloc_mode_context(AV1_COMMON *cm, int num_pix,
-                               PICK_MODE_CONTEXT *ctx) {
-  const int num_planes = av1_num_planes(cm);
-  int i;
-  const int num_blk = num_pix / 16;
-  ctx->num_4x4_blk = num_blk;
-
-  CHECK_MEM_ERROR(cm, ctx->blk_skip, aom_calloc(num_blk, sizeof(uint8_t)));
-  for (i = 0; i < num_planes; ++i) {
-    CHECK_MEM_ERROR(cm, ctx->coeff[i],
-                    aom_memalign(32, num_pix * sizeof(*ctx->coeff[i])));
-    CHECK_MEM_ERROR(cm, ctx->qcoeff[i],
-                    aom_memalign(32, num_pix * sizeof(*ctx->qcoeff[i])));
-    CHECK_MEM_ERROR(cm, ctx->dqcoeff[i],
-                    aom_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i])));
-    CHECK_MEM_ERROR(cm, ctx->eobs[i],
-                    aom_memalign(32, num_blk * sizeof(*ctx->eobs[i])));
-    CHECK_MEM_ERROR(
-        cm, ctx->txb_entropy_ctx[i],
-        aom_memalign(32, num_blk * sizeof(*ctx->txb_entropy_ctx[i])));
-  }
-
-  if (num_pix <= MAX_PALETTE_SQUARE) {
-    for (i = 0; i < 2; ++i) {
-      CHECK_MEM_ERROR(
-          cm, ctx->color_index_map[i],
-          aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
-    }
-  }
-}
-
-static void free_mode_context(PICK_MODE_CONTEXT *ctx, const int num_planes) {
-  int i;
-  aom_free(ctx->blk_skip);
-  ctx->blk_skip = 0;
-  for (i = 0; i < num_planes; ++i) {
-    aom_free(ctx->coeff[i]);
-    ctx->coeff[i] = 0;
-    aom_free(ctx->qcoeff[i]);
-    ctx->qcoeff[i] = 0;
-    aom_free(ctx->dqcoeff[i]);
-    ctx->dqcoeff[i] = 0;
-    aom_free(ctx->eobs[i]);
-    ctx->eobs[i] = 0;
-    aom_free(ctx->txb_entropy_ctx[i]);
-    ctx->txb_entropy_ctx[i] = 0;
-  }
-
-  for (i = 0; i < 2; ++i) {
-    aom_free(ctx->color_index_map[i]);
-    ctx->color_index_map[i] = 0;
-  }
-}
-
-static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree, int num_pix,
-                                int is_leaf) {
-  alloc_mode_context(cm, num_pix, &tree->none);
-
-  if (is_leaf) return;
-
-  alloc_mode_context(cm, num_pix / 2, &tree->horizontal[0]);
-  alloc_mode_context(cm, num_pix / 2, &tree->vertical[0]);
-
-  alloc_mode_context(cm, num_pix / 2, &tree->horizontal[1]);
-  alloc_mode_context(cm, num_pix / 2, &tree->vertical[1]);
-
-  alloc_mode_context(cm, num_pix / 4, &tree->horizontala[0]);
-  alloc_mode_context(cm, num_pix / 4, &tree->horizontala[1]);
-  alloc_mode_context(cm, num_pix / 2, &tree->horizontala[2]);
-
-  alloc_mode_context(cm, num_pix / 2, &tree->horizontalb[0]);
-  alloc_mode_context(cm, num_pix / 4, &tree->horizontalb[1]);
-  alloc_mode_context(cm, num_pix / 4, &tree->horizontalb[2]);
-
-  alloc_mode_context(cm, num_pix / 4, &tree->verticala[0]);
-  alloc_mode_context(cm, num_pix / 4, &tree->verticala[1]);
-  alloc_mode_context(cm, num_pix / 2, &tree->verticala[2]);
-
-  alloc_mode_context(cm, num_pix / 2, &tree->verticalb[0]);
-  alloc_mode_context(cm, num_pix / 4, &tree->verticalb[1]);
-  alloc_mode_context(cm, num_pix / 4, &tree->verticalb[2]);
-
-  for (int i = 0; i < 4; ++i) {
-    alloc_mode_context(cm, num_pix / 4, &tree->horizontal4[i]);
-    alloc_mode_context(cm, num_pix / 4, &tree->vertical4[i]);
-  }
-}
-
-static void free_tree_contexts(PC_TREE *tree, const int num_planes) {
-  int i;
-  for (i = 0; i < 3; i++) {
-    free_mode_context(&tree->horizontala[i], num_planes);
-    free_mode_context(&tree->horizontalb[i], num_planes);
-    free_mode_context(&tree->verticala[i], num_planes);
-    free_mode_context(&tree->verticalb[i], num_planes);
-  }
-  for (i = 0; i < 4; ++i) {
-    free_mode_context(&tree->horizontal4[i], num_planes);
-    free_mode_context(&tree->vertical4[i], num_planes);
-  }
-  free_mode_context(&tree->none, num_planes);
-  free_mode_context(&tree->horizontal[0], num_planes);
-  free_mode_context(&tree->horizontal[1], num_planes);
-  free_mode_context(&tree->vertical[0], num_planes);
-  free_mode_context(&tree->vertical[1], num_planes);
-}
-
-// This function sets up a tree of contexts such that at each square
-// partition level. There are contexts for none, horizontal, vertical, and
-// split.  Along with a block_size value and a selected block_size which
-// represents the state of our search.
-void av1_setup_pc_tree(AV1_COMMON *cm, ThreadData *td) {
-  int i, j;
-  const int tree_nodes_inc = 1024;
-  const int leaf_factor = 4;
-  const int leaf_nodes = 256 * leaf_factor;
-  const int tree_nodes = tree_nodes_inc + 256 + 64 + 16 + 4 + 1;
-  int pc_tree_index = 0;
-  PC_TREE *this_pc;
-  int square_index = 1;
-  int nodes;
-
-  aom_free(td->pc_tree);
-  CHECK_MEM_ERROR(cm, td->pc_tree,
-                  aom_calloc(tree_nodes, sizeof(*td->pc_tree)));
-  this_pc = &td->pc_tree[0];
-
-  // Sets up all the leaf nodes in the tree.
-  for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
-    PC_TREE *const tree = &td->pc_tree[pc_tree_index];
-    tree->block_size = square[0];
-    alloc_tree_contexts(cm, tree, 16, 1);
-  }
-
-  // Each node has 4 leaf nodes, fill each block_size level of the tree
-  // from leafs to the root.
-  for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
-    for (i = 0; i < nodes; ++i) {
-      PC_TREE *const tree = &td->pc_tree[pc_tree_index];
-      alloc_tree_contexts(cm, tree, 16 << (2 * square_index), 0);
-      tree->block_size = square[square_index];
-      for (j = 0; j < 4; j++) tree->split[j] = this_pc++;
-      ++pc_tree_index;
-    }
-    ++square_index;
-  }
-
-  // Set up the root node for the largest superblock size
-  i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2;
-  td->pc_root[i] = &td->pc_tree[tree_nodes - 1];
-  td->pc_root[i]->none.best_mode_index = 2;
-  // Set up the root nodes for the rest of the possible superblock sizes
-  while (--i >= 0) {
-    td->pc_root[i] = td->pc_root[i + 1]->split[0];
-    td->pc_root[i]->none.best_mode_index = 2;
-  }
-}
-
-void av1_free_pc_tree(ThreadData *td, const int num_planes) {
-  if (td->pc_tree != NULL) {
-    const int tree_nodes_inc = 1024;
-    const int tree_nodes = tree_nodes_inc + 256 + 64 + 16 + 4 + 1;
-    for (int i = 0; i < tree_nodes; ++i) {
-      free_tree_contexts(&td->pc_tree[i], num_planes);
-    }
-    aom_free(td->pc_tree);
-    td->pc_tree = NULL;
-  }
-}
-
-void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx,
-                           PICK_MODE_CONTEXT *src_ctx) {
-  dst_ctx->mic = src_ctx->mic;
-  dst_ctx->mbmi_ext = src_ctx->mbmi_ext;
-
-  dst_ctx->num_4x4_blk = src_ctx->num_4x4_blk;
-  dst_ctx->skip = src_ctx->skip;
-  dst_ctx->skippable = src_ctx->skippable;
-  dst_ctx->best_mode_index = src_ctx->best_mode_index;
-
-  memcpy(dst_ctx->blk_skip, src_ctx->blk_skip,
-         sizeof(uint8_t) * src_ctx->num_4x4_blk);
-
-  dst_ctx->hybrid_pred_diff = src_ctx->hybrid_pred_diff;
-  dst_ctx->comp_pred_diff = src_ctx->comp_pred_diff;
-  dst_ctx->single_pred_diff = src_ctx->single_pred_diff;
-
-  dst_ctx->rate = src_ctx->rate;
-  dst_ctx->dist = src_ctx->dist;
-  dst_ctx->rdcost = src_ctx->rdcost;
-  dst_ctx->rd_mode_is_ready = src_ctx->rd_mode_is_ready;
-
-  memcpy(dst_ctx->pred_mv, src_ctx->pred_mv, sizeof(MV) * REF_FRAMES);
-  dst_ctx->pred_interp_filter = src_ctx->pred_interp_filter;
-
-  dst_ctx->partition = src_ctx->partition;
-}
diff --git a/third_party/aom/av1/encoder/context_tree.h b/third_party/aom/av1/encoder/context_tree.h
deleted file mode 100644
index 4efc34985..000000000
--- a/third_party/aom/av1/encoder/context_tree.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_CONTEXT_TREE_H_
-#define AOM_AV1_ENCODER_CONTEXT_TREE_H_
-
-#include "av1/common/blockd.h"
-#include "av1/encoder/block.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1_COMP;
-struct AV1Common;
-struct ThreadData;
-
-typedef enum {
-  // Search all the partition types in this plane.
-  SEARCH_FULL_PLANE = 0,
-  // Only search none_partition coding block.
-  NONE_PARTITION_PLANE = 1,
-  // Search all the partition types in this plane except split.
-  SEARCH_SAME_PLANE = 2,
-  // Skip search partition on this plane. Go split directly.
-  SPLIT_PLANE = 3,
-} CB_TREE_SEARCH;
-
-// Structure to hold snapshot of coding context during the mode picking process
-typedef struct {
-  MB_MODE_INFO mic;
-  MB_MODE_INFO_EXT mbmi_ext;
-  uint8_t *color_index_map[2];
-  uint8_t *blk_skip;
-
-  tran_low_t *coeff[MAX_MB_PLANE];
-  tran_low_t *qcoeff[MAX_MB_PLANE];
-  tran_low_t *dqcoeff[MAX_MB_PLANE];
-  uint16_t *eobs[MAX_MB_PLANE];
-  uint8_t *txb_entropy_ctx[MAX_MB_PLANE];
-
-  int num_4x4_blk;
-  int skip;
-  // For current partition, only if all Y, U, and V transform blocks'
-  // coefficients are quantized to 0, skippable is set to 1.
-  int skippable;
-  int best_mode_index;
-  int hybrid_pred_diff;
-  int comp_pred_diff;
-  int single_pred_diff;
-  // Skip certain ref frames during RD search of rectangular partitions.
-  int skip_ref_frame_mask;
-
-  // TODO(jingning) Use RD_COST struct here instead. This involves a boarder
-  // scope of refactoring.
-  int rate;
-  int64_t dist;
-  int64_t rdcost;
-  int rd_mode_is_ready;  // Flag to indicate whether rd pick mode decision has
-                         // been made.
-
-  // motion vector cache for adaptive motion search control in partition
-  // search loop
-  MV pred_mv[REF_FRAMES];
-  InterpFilter pred_interp_filter;
-  PARTITION_TYPE partition;
-} PICK_MODE_CONTEXT;
-
-typedef struct {
-  int valid;
-  int split;
-  int skip;
-  int64_t rdcost;
-  int sub_block_split[4];
-  int sub_block_skip[4];
-  int64_t sub_block_rdcost[4];
-} PC_TREE_STATS;
-
-typedef struct PC_TREE {
-  int index;
-  PARTITION_TYPE partitioning;
-  BLOCK_SIZE block_size;
-  PICK_MODE_CONTEXT none;
-  PICK_MODE_CONTEXT horizontal[2];
-  PICK_MODE_CONTEXT vertical[2];
-  PICK_MODE_CONTEXT horizontala[3];
-  PICK_MODE_CONTEXT horizontalb[3];
-  PICK_MODE_CONTEXT verticala[3];
-  PICK_MODE_CONTEXT verticalb[3];
-  PICK_MODE_CONTEXT horizontal4[4];
-  PICK_MODE_CONTEXT vertical4[4];
-  CB_TREE_SEARCH cb_search_range;
-  struct PC_TREE *split[4];
-  PC_TREE_STATS pc_tree_stats;
-} PC_TREE;
-
-void av1_setup_pc_tree(struct AV1Common *cm, struct ThreadData *td);
-void av1_free_pc_tree(struct ThreadData *td, const int num_planes);
-void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx,
-                           PICK_MODE_CONTEXT *src_ctx);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_CONTEXT_TREE_H_
diff --git a/third_party/aom/av1/encoder/corner_detect.c b/third_party/aom/av1/encoder/corner_detect.c
deleted file mode 100644
index e4c59dd9c..000000000
--- a/third_party/aom/av1/encoder/corner_detect.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <memory.h>
-#include <math.h>
-#include <assert.h>
-
-#include "third_party/fastfeat/fast.h"
-
-#include "av1/encoder/corner_detect.h"
-
-// Fast_9 wrapper
-#define FAST_BARRIER 18
-int fast_corner_detect(unsigned char *buf, int width, int height, int stride,
-                       int *points, int max_points) {
-  int num_points;
-  xy *const frm_corners_xy = fast9_detect_nonmax(buf, width, height, stride,
-                                                 FAST_BARRIER, &num_points);
-  num_points = (num_points <= max_points ? num_points : max_points);
-  if (num_points > 0 && frm_corners_xy) {
-    memcpy(points, frm_corners_xy, sizeof(*frm_corners_xy) * num_points);
-    free(frm_corners_xy);
-    return num_points;
-  }
-  free(frm_corners_xy);
-  return 0;
-}
diff --git a/third_party/aom/av1/encoder/corner_detect.h b/third_party/aom/av1/encoder/corner_detect.h
deleted file mode 100644
index cab59a774..000000000
--- a/third_party/aom/av1/encoder/corner_detect.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_CORNER_DETECT_H_
-#define AOM_AV1_ENCODER_CORNER_DETECT_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-
-int fast_corner_detect(unsigned char *buf, int width, int height, int stride,
-                       int *points, int max_points);
-
-#endif  // AOM_AV1_ENCODER_CORNER_DETECT_H_
diff --git a/third_party/aom/av1/encoder/corner_match.c b/third_party/aom/av1/encoder/corner_match.c
deleted file mode 100644
index 29e934deb..000000000
--- a/third_party/aom/av1/encoder/corner_match.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <memory.h>
-#include <math.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/encoder/corner_match.h"
-
-#define SEARCH_SZ 9
-#define SEARCH_SZ_BY2 ((SEARCH_SZ - 1) / 2)
-
-#define THRESHOLD_NCC 0.75
-
-/* Compute var(im) * MATCH_SZ_SQ over a MATCH_SZ by MATCH_SZ window of im,
-   centered at (x, y).
-*/
-static double compute_variance(unsigned char *im, int stride, int x, int y) {
-  int sum = 0;
-  int sumsq = 0;
-  int var;
-  int i, j;
-  for (i = 0; i < MATCH_SZ; ++i)
-    for (j = 0; j < MATCH_SZ; ++j) {
-      sum += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)];
-      sumsq += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)] *
-               im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)];
-    }
-  var = sumsq * MATCH_SZ_SQ - sum * sum;
-  return (double)var;
-}
-
-/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the
-   correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows
-   of each image, centered at (x1, y1) and (x2, y2) respectively.
-*/
-double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1,
-                                   int y1, unsigned char *im2, int stride2,
-                                   int x2, int y2) {
-  int v1, v2;
-  int sum1 = 0;
-  int sum2 = 0;
-  int sumsq2 = 0;
-  int cross = 0;
-  int var2, cov;
-  int i, j;
-  for (i = 0; i < MATCH_SZ; ++i)
-    for (j = 0; j < MATCH_SZ; ++j) {
-      v1 = im1[(i + y1 - MATCH_SZ_BY2) * stride1 + (j + x1 - MATCH_SZ_BY2)];
-      v2 = im2[(i + y2 - MATCH_SZ_BY2) * stride2 + (j + x2 - MATCH_SZ_BY2)];
-      sum1 += v1;
-      sum2 += v2;
-      sumsq2 += v2 * v2;
-      cross += v1 * v2;
-    }
-  var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2;
-  cov = cross * MATCH_SZ_SQ - sum1 * sum2;
-  return cov / sqrt((double)var2);
-}
-
-static int is_eligible_point(int pointx, int pointy, int width, int height) {
-  return (pointx >= MATCH_SZ_BY2 && pointy >= MATCH_SZ_BY2 &&
-          pointx + MATCH_SZ_BY2 < width && pointy + MATCH_SZ_BY2 < height);
-}
-
-static int is_eligible_distance(int point1x, int point1y, int point2x,
-                                int point2y, int width, int height) {
-  const int thresh = (width < height ? height : width) >> 4;
-  return ((point1x - point2x) * (point1x - point2x) +
-          (point1y - point2y) * (point1y - point2y)) <= thresh * thresh;
-}
-
-static void improve_correspondence(unsigned char *frm, unsigned char *ref,
-                                   int width, int height, int frm_stride,
-                                   int ref_stride,
-                                   Correspondence *correspondences,
-                                   int num_correspondences) {
-  int i;
-  for (i = 0; i < num_correspondences; ++i) {
-    int x, y, best_x = 0, best_y = 0;
-    double best_match_ncc = 0.0;
-    for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y) {
-      for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) {
-        double match_ncc;
-        if (!is_eligible_point(correspondences[i].rx + x,
-                               correspondences[i].ry + y, width, height))
-          continue;
-        if (!is_eligible_distance(correspondences[i].x, correspondences[i].y,
-                                  correspondences[i].rx + x,
-                                  correspondences[i].ry + y, width, height))
-          continue;
-        match_ncc = compute_cross_correlation(
-            frm, frm_stride, correspondences[i].x, correspondences[i].y, ref,
-            ref_stride, correspondences[i].rx + x, correspondences[i].ry + y);
-        if (match_ncc > best_match_ncc) {
-          best_match_ncc = match_ncc;
-          best_y = y;
-          best_x = x;
-        }
-      }
-    }
-    correspondences[i].rx += best_x;
-    correspondences[i].ry += best_y;
-  }
-  for (i = 0; i < num_correspondences; ++i) {
-    int x, y, best_x = 0, best_y = 0;
-    double best_match_ncc = 0.0;
-    for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y)
-      for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) {
-        double match_ncc;
-        if (!is_eligible_point(correspondences[i].x + x,
-                               correspondences[i].y + y, width, height))
-          continue;
-        if (!is_eligible_distance(
-                correspondences[i].x + x, correspondences[i].y + y,
-                correspondences[i].rx, correspondences[i].ry, width, height))
-          continue;
-        match_ncc = compute_cross_correlation(
-            ref, ref_stride, correspondences[i].rx, correspondences[i].ry, frm,
-            frm_stride, correspondences[i].x + x, correspondences[i].y + y);
-        if (match_ncc > best_match_ncc) {
-          best_match_ncc = match_ncc;
-          best_y = y;
-          best_x = x;
-        }
-      }
-    correspondences[i].x += best_x;
-    correspondences[i].y += best_y;
-  }
-}
-
-int determine_correspondence(unsigned char *frm, int *frm_corners,
-                             int num_frm_corners, unsigned char *ref,
-                             int *ref_corners, int num_ref_corners, int width,
-                             int height, int frm_stride, int ref_stride,
-                             int *correspondence_pts) {
-  // TODO(sarahparker) Improve this to include 2-way match
-  int i, j;
-  Correspondence *correspondences = (Correspondence *)correspondence_pts;
-  int num_correspondences = 0;
-  for (i = 0; i < num_frm_corners; ++i) {
-    double best_match_ncc = 0.0;
-    double template_norm;
-    int best_match_j = -1;
-    if (!is_eligible_point(frm_corners[2 * i], frm_corners[2 * i + 1], width,
-                           height))
-      continue;
-    for (j = 0; j < num_ref_corners; ++j) {
-      double match_ncc;
-      if (!is_eligible_point(ref_corners[2 * j], ref_corners[2 * j + 1], width,
-                             height))
-        continue;
-      if (!is_eligible_distance(frm_corners[2 * i], frm_corners[2 * i + 1],
-                                ref_corners[2 * j], ref_corners[2 * j + 1],
-                                width, height))
-        continue;
-      match_ncc = compute_cross_correlation(
-          frm, frm_stride, frm_corners[2 * i], frm_corners[2 * i + 1], ref,
-          ref_stride, ref_corners[2 * j], ref_corners[2 * j + 1]);
-      if (match_ncc > best_match_ncc) {
-        best_match_ncc = match_ncc;
-        best_match_j = j;
-      }
-    }
-    // Note: We want to test if the best correlation is >= THRESHOLD_NCC,
-    // but need to account for the normalization in compute_cross_correlation.
-    template_norm = compute_variance(frm, frm_stride, frm_corners[2 * i],
-                                     frm_corners[2 * i + 1]);
-    if (best_match_ncc > THRESHOLD_NCC * sqrt(template_norm)) {
-      correspondences[num_correspondences].x = frm_corners[2 * i];
-      correspondences[num_correspondences].y = frm_corners[2 * i + 1];
-      correspondences[num_correspondences].rx = ref_corners[2 * best_match_j];
-      correspondences[num_correspondences].ry =
-          ref_corners[2 * best_match_j + 1];
-      num_correspondences++;
-    }
-  }
-  improve_correspondence(frm, ref, width, height, frm_stride, ref_stride,
-                         correspondences, num_correspondences);
-  return num_correspondences;
-}
diff --git a/third_party/aom/av1/encoder/corner_match.h b/third_party/aom/av1/encoder/corner_match.h
deleted file mode 100644
index 535d2faed..000000000
--- a/third_party/aom/av1/encoder/corner_match.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_ENCODER_CORNER_MATCH_H_
-#define AOM_AV1_ENCODER_CORNER_MATCH_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-
-#define MATCH_SZ 13
-#define MATCH_SZ_BY2 ((MATCH_SZ - 1) / 2)
-#define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ)
-
-typedef struct {
-  int x, y;
-  int rx, ry;
-} Correspondence;
-
-int determine_correspondence(unsigned char *frm, int *frm_corners,
-                             int num_frm_corners, unsigned char *ref,
-                             int *ref_corners, int num_ref_corners, int width,
-                             int height, int frm_stride, int ref_stride,
-                             int *correspondence_pts);
-
-#endif  // AOM_AV1_ENCODER_CORNER_MATCH_H_
diff --git a/third_party/aom/av1/encoder/cost.c b/third_party/aom/av1/encoder/cost.c
deleted file mode 100644
index 323e2aed5..000000000
--- a/third_party/aom/av1/encoder/cost.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-
-#include "av1/encoder/cost.h"
-#include "av1/common/entropy.h"
-
-// round(-log2(i/256.) * (1 << AV1_PROB_COST_SHIFT)); i = 128~255.
-const uint16_t av1_prob_cost[128] = {
-  512, 506, 501, 495, 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435,
-  430, 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371, 366, 361,
-  356, 352, 347, 343, 338, 333, 329, 324, 320, 316, 311, 307, 302, 298, 294,
-  289, 285, 281, 277, 273, 268, 264, 260, 256, 252, 248, 244, 240, 236, 232,
-  228, 224, 220, 216, 212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175,
-  171, 168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129, 125, 122,
-  119, 115, 112, 109, 105, 102, 99,  95,  92,  89,  86,  82,  79,  76,  73,
-  70,  66,  63,  60,  57,  54,  51,  48,  45,  42,  38,  35,  32,  29,  26,
-  23,  20,  18,  15,  12,  9,   6,   3,
-};
-
-void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf,
-                              const int *inv_map) {
-  int i;
-  aom_cdf_prob prev_cdf = 0;
-  for (i = 0;; ++i) {
-    aom_cdf_prob p15 = AOM_ICDF(cdf[i]) - prev_cdf;
-    p15 = (p15 < EC_MIN_PROB) ? EC_MIN_PROB : p15;
-    prev_cdf = AOM_ICDF(cdf[i]);
-
-    if (inv_map)
-      costs[inv_map[i]] = av1_cost_symbol(p15);
-    else
-      costs[i] = av1_cost_symbol(p15);
-
-    // Stop once we reach the end of the CDF
-    if (cdf[i] == AOM_ICDF(CDF_PROB_TOP)) break;
-  }
-}
diff --git a/third_party/aom/av1/encoder/cost.h b/third_party/aom/av1/encoder/cost.h
deleted file mode 100644
index af5b09837..000000000
--- a/third_party/aom/av1/encoder/cost.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_COST_H_
-#define AOM_AV1_ENCODER_COST_H_
-
-#include "aom_dsp/prob.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern const uint16_t av1_prob_cost[128];
-
-// The factor to scale from cost in bits to cost in av1_prob_cost units.
-#define AV1_PROB_COST_SHIFT 9
-
-// Cost of coding an n bit literal, using 128 (i.e. 50%) probability
-// for each bit.
-#define av1_cost_literal(n) ((n) * (1 << AV1_PROB_COST_SHIFT))
-
-// Calculate the cost of a symbol with probability p15 / 2^15
-static INLINE int av1_cost_symbol(aom_cdf_prob p15) {
-  assert(0 < p15 && p15 < CDF_PROB_TOP);
-  const int shift = CDF_PROB_BITS - 1 - get_msb(p15);
-  const int prob = get_prob(p15 << shift, CDF_PROB_TOP);
-  assert(prob >= 128);
-  return av1_prob_cost[prob - 128] + av1_cost_literal(shift);
-}
-
-void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf,
-                              const int *inv_map);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_COST_H_
diff --git a/third_party/aom/av1/encoder/dwt.c b/third_party/aom/av1/encoder/dwt.c
deleted file mode 100644
index 04088b25f..000000000
--- a/third_party/aom/av1/encoder/dwt.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <math.h>
-
-#include "config/av1_rtcd.h"
-#include "av1/encoder/dwt.h"
-
-// Note: block length must be even for this implementation
-static void analysis_53_row(int length, tran_low_t *x, tran_low_t *lowpass,
-                            tran_low_t *highpass) {
-  int n;
-  tran_low_t r, *a, *b;
-
-  n = length >> 1;
-  b = highpass;
-  a = lowpass;
-  while (--n) {
-    *a++ = (r = *x++) * 2;
-    *b++ = *x - ((r + x[1] + 1) >> 1);
-    x++;
-  }
-  *a = (r = *x++) * 2;
-  *b = *x - r;
-
-  n = length >> 1;
-  b = highpass;
-  a = lowpass;
-  r = *highpass;
-  while (n--) {
-    *a++ += (r + (*b) + 1) >> 1;
-    r = *b++;
-  }
-}
-
-static void analysis_53_col(int length, tran_low_t *x, tran_low_t *lowpass,
-                            tran_low_t *highpass) {
-  int n;
-  tran_low_t r, *a, *b;
-
-  n = length >> 1;
-  b = highpass;
-  a = lowpass;
-  while (--n) {
-    *a++ = (r = *x++);
-    *b++ = (((*x) * 2) - (r + x[1]) + 2) >> 2;
-    x++;
-  }
-  *a = (r = *x++);
-  *b = (*x - r + 1) >> 1;
-
-  n = length >> 1;
-  b = highpass;
-  a = lowpass;
-  r = *highpass;
-  while (n--) {
-    *a++ += (r + (*b) + 1) >> 1;
-    r = *b++;
-  }
-}
-
-static void dyadic_analyze_53_uint8_input(int levels, int width, int height,
-                                          uint8_t *x, int pitch_x,
-                                          tran_low_t *c, int pitch_c,
-                                          int dwt_scale_bits, int hbd) {
-  int lv, i, j, nh, nw, hh = height, hw = width;
-  tran_low_t buffer[2 * DWT_MAX_LENGTH];
-
-  if (hbd) {
-    uint16_t *x16 = CONVERT_TO_SHORTPTR(x);
-    for (i = 0; i < height; i++) {
-      for (j = 0; j < width; j++) {
-        c[i * pitch_c + j] = x16[i * pitch_x + j] << dwt_scale_bits;
-      }
-    }
-  } else {
-    for (i = 0; i < height; i++) {
-      for (j = 0; j < width; j++) {
-        c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits;
-      }
-    }
-  }
-
-  for (lv = 0; lv < levels; lv++) {
-    nh = hh;
-    hh = (hh + 1) >> 1;
-    nw = hw;
-    hw = (hw + 1) >> 1;
-    if ((nh < 2) || (nw < 2)) return;
-    for (i = 0; i < nh; i++) {
-      memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t));
-      analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw);
-    }
-    for (j = 0; j < nw; j++) {
-      for (i = 0; i < nh; i++) buffer[i + nh] = c[i * pitch_c + j];
-      analysis_53_col(nh, buffer + nh, buffer, buffer + hh);
-      for (i = 0; i < nh; i++) c[i * pitch_c + j] = buffer[i];
-    }
-  }
-}
-
-void av1_fdwt8x8_uint8_input_c(uint8_t *input, tran_low_t *output, int stride,
-                               int hbd) {
-  dyadic_analyze_53_uint8_input(4, 8, 8, input, stride, output, 8, 2, hbd);
-}
-
-int av1_haar_ac_sad(tran_low_t *output, int bw, int bh, int stride) {
-  int acsad = 0;
-
-  for (int r = 0; r < bh; ++r)
-    for (int c = 0; c < bw; ++c) {
-      if (r >= bh / 2 || c >= bw / 2) acsad += abs(output[r * stride + c]);
-    }
-  return acsad;
-}
-
-uint64_t av1_dct_ac_sad(tran_low_t *output, int bw, int bh, int stride) {
-  uint64_t acsad = 0;
-
-  for (int r = 0; r < bh; ++r)
-    for (int c = 0; c < bw; ++c) {
-      if (r > 0 || c > 0) acsad += abs(output[r * stride + c]);
-    }
-
-  return acsad;
-}
-
-uint32_t av1_variance(uint8_t *input, int bw, int bh, int stride) {
-  int sum = 0;
-  uint32_t sse = 0;
-
-  for (int r = 0; r < bh; ++r)
-    for (int c = 0; c < bw; ++c) {
-      sum += input[r * stride + c];
-      sse += input[r * stride + c] * input[r * stride + c];
-    }
-  return sse - (uint32_t)(((int64_t)sum * sum) / (bw * bh));
-}
-
-int av1_haar_ac_sad_8x8_uint8_input(uint8_t *input, int stride, int hbd) {
-  tran_low_t output[64];
-
-  av1_fdwt8x8_uint8_input_c(input, output, stride, hbd);
-  return av1_haar_ac_sad(output, 8, 8, 8);
-}
diff --git a/third_party/aom/av1/encoder/dwt.h b/third_party/aom/av1/encoder/dwt.h
deleted file mode 100644
index 37306c6a5..000000000
--- a/third_party/aom/av1/encoder/dwt.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_DWT_H_
-#define AOM_AV1_ENCODER_DWT_H_
-
-#include "av1/common/common.h"
-#include "av1/common/enums.h"
-
-#define DWT_MAX_LENGTH 64
-
-void av1_fdwt8x8(tran_low_t *input, tran_low_t *output, int stride);
-void av1_fdwt8x8_uint8_input_c(uint8_t *input, tran_low_t *output, int stride,
-                               int hbd);
-int av1_haar_ac_sad_8x8_uint8_input(uint8_t *input, int stride, int hbd);
-
-#endif  // AOM_AV1_ENCODER_DWT_H_
diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c
deleted file mode 100644
index cb226c59e..000000000
--- a/third_party/aom/av1/encoder/encodeframe.c
+++ /dev/null
@@ -1,5739 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/binary_codes_writer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/system_state.h"
-
-#if CONFIG_MISMATCH_DEBUG
-#include "aom_util/debug_util.h"
-#endif  // CONFIG_MISMATCH_DEBUG
-
-#include "av1/common/cfl.h"
-#include "av1/common/common.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/idct.h"
-#include "av1/common/mv.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/tile_common.h"
-#include "av1/common/warped_motion.h"
-
-#include "av1/encoder/aq_complexity.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/global_motion.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/ethread.h"
-#include "av1/encoder/extend.h"
-#include "av1/encoder/ml.h"
-#include "av1/encoder/partition_model_weights.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/reconinter_enc.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/tokenize.h"
-
-static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
-                              ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize,
-                              int *rate);
-
-// This is used as a reference when computing the source variance for the
-//  purposes of activity masking.
-// Eventually this should be replaced by custom no-reference routines,
-//  which will be faster.
-static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128
-};
-
-static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-  128, 128, 128, 128, 128, 128, 128, 128
-};
-
-static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
-  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
-};
-
-static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
-  128 * 16, 128 * 16
-};
-
-#if CONFIG_FP_MB_STATS
-static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
-  1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 1, 1, 1, 2, 2, 4
-};
-static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES_ALL] = {
-  1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 1, 1, 2, 1, 4, 2
-};
-#endif  // CONFIG_FP_MB_STATS
-
-unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
-                                           const struct buf_2d *ref,
-                                           BLOCK_SIZE bs) {
-  unsigned int sse;
-  const unsigned int var =
-      cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
-  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
-}
-
-unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
-                                                const struct buf_2d *ref,
-                                                BLOCK_SIZE bs, int bd) {
-  unsigned int var, sse;
-  switch (bd) {
-    case 10:
-      var =
-          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
-                             CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), 0, &sse);
-      break;
-    case 12:
-      var =
-          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
-                             CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), 0, &sse);
-      break;
-    case 8:
-    default:
-      var =
-          cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
-                             CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), 0, &sse);
-      break;
-  }
-  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
-}
-
-static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
-                                                   const struct buf_2d *ref,
-                                                   int mi_row, int mi_col,
-                                                   BLOCK_SIZE bs) {
-  unsigned int sse, var;
-  uint8_t *last_y;
-  const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
-
-  assert(last != NULL);
-  last_y =
-      &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
-  var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
-  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
-}
-
-static BLOCK_SIZE get_rd_var_based_fixed_partition(AV1_COMP *cpi, MACROBLOCK *x,
-                                                   int mi_row, int mi_col) {
-  unsigned int var = get_sby_perpixel_diff_variance(
-      cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
-  if (var < 8)
-    return BLOCK_64X64;
-  else if (var < 128)
-    return BLOCK_32X32;
-  else if (var < 2048)
-    return BLOCK_16X16;
-  else
-    return BLOCK_8X8;
-}
-
-// Lighter version of set_offsets that only sets the mode info
-// pointers.
-static void set_mode_info_offsets(const AV1_COMP *const cpi,
-                                  MACROBLOCK *const x, MACROBLOCKD *const xd,
-                                  int mi_row, int mi_col) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int idx_str = xd->mi_stride * mi_row + mi_col;
-  xd->mi = cm->mi_grid_visible + idx_str;
-  xd->mi[0] = cm->mi + idx_str;
-  x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
-}
-
-static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
-                                           const TileInfo *const tile,
-                                           MACROBLOCK *const x, int mi_row,
-                                           int mi_col, BLOCK_SIZE bsize) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const int mi_width = mi_size_wide[bsize];
-  const int mi_height = mi_size_high[bsize];
-
-  set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
-
-  set_skip_context(xd, mi_row, mi_col, num_planes);
-  xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col;
-  xd->left_txfm_context =
-      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
-  // Set up destination pointers.
-  av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
-                       mi_col, 0, num_planes);
-
-  // Set up limit values for MV components.
-  // Mv beyond the range do not produce new/different prediction block.
-  x->mv_limits.row_min =
-      -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND);
-  x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND);
-  x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND;
-  x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND;
-
-  set_plane_n4(xd, mi_width, mi_height, num_planes);
-
-  // Set up distance of MB to edge of frame in 1/8th pel units.
-  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
-  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
-                 cm->mi_cols);
-
-  // Set up source buffers.
-  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
-
-  // R/D setup.
-  x->rdmult = cpi->rd.RDMULT;
-
-  // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
-  xd->tile = *tile;
-}
-
-static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
-                        MACROBLOCK *const x, int mi_row, int mi_col,
-                        BLOCK_SIZE bsize) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi;
-  const struct segmentation *const seg = &cm->seg;
-
-  set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
-
-  mbmi = xd->mi[0];
-  xd->cfl.mi_row = mi_row;
-  xd->cfl.mi_col = mi_col;
-
-  mbmi->segment_id = 0;
-
-  // Setup segment ID.
-  if (seg->enabled) {
-    if (seg->enabled && !cpi->vaq_refresh) {
-      const uint8_t *const map =
-          seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
-      mbmi->segment_id =
-          map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0;
-    }
-    av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
-  }
-}
-
-static void reset_intmv_filter_type(MB_MODE_INFO *mbmi) {
-  InterpFilter filters[2];
-
-  for (int dir = 0; dir < 2; ++dir) {
-    filters[dir] = av1_extract_interp_filter(mbmi->interp_filters, dir);
-  }
-  mbmi->interp_filters = av1_make_interp_filters(filters[0], filters[1]);
-}
-
-static void update_filter_type_count(uint8_t allow_update_cdf,
-                                     FRAME_COUNTS *counts,
-                                     const MACROBLOCKD *xd,
-                                     const MB_MODE_INFO *mbmi) {
-  int dir;
-  for (dir = 0; dir < 2; ++dir) {
-    const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-    InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
-    ++counts->switchable_interp[ctx][filter];
-    if (allow_update_cdf) {
-      update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter,
-                 SWITCHABLE_FILTERS);
-    }
-  }
-}
-
-static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize,
-                                      const MB_MODE_INFO *mbmi,
-                                      RD_COUNTS *rdc) {
-  if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) {
-    const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize];
-    int ref;
-    for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
-      rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
-    }
-  }
-}
-
-static void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
-                          const TX_MODE tx_mode) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  if (xd->lossless[mbmi->segment_id]) {
-    mbmi->tx_size = TX_4X4;
-  } else if (tx_mode != TX_MODE_SELECT) {
-    mbmi->tx_size = tx_size_from_tx_mode(mbmi->sb_type, tx_mode);
-  } else {
-    BLOCK_SIZE bsize = mbmi->sb_type;
-    TX_SIZE min_tx_size = depth_to_tx_size(MAX_TX_DEPTH, bsize);
-    mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size);
-  }
-  if (is_inter_block(mbmi)) {
-    memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
-  }
-  memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
-  av1_zero(x->blk_skip);
-  x->skip = 0;
-}
-
-static void update_state(const AV1_COMP *const cpi,
-                         const TileDataEnc *const tile_data, ThreadData *td,
-                         const PICK_MODE_CONTEXT *const ctx, int mi_row,
-                         int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) {
-  int i, x_idx, y;
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  RD_COUNTS *const rdc = &td->rd_counts;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblock_plane *const p = x->plane;
-  struct macroblockd_plane *const pd = xd->plane;
-  const MB_MODE_INFO *const mi = &ctx->mic;
-  MB_MODE_INFO *const mi_addr = xd->mi[0];
-  const struct segmentation *const seg = &cm->seg;
-  const int bw = mi_size_wide[mi->sb_type];
-  const int bh = mi_size_high[mi->sb_type];
-  const int mis = cm->mi_stride;
-  const int mi_width = mi_size_wide[bsize];
-  const int mi_height = mi_size_high[bsize];
-
-  assert(mi->sb_type == bsize);
-
-  *mi_addr = *mi;
-  *x->mbmi_ext = ctx->mbmi_ext;
-
-  reset_intmv_filter_type(mi_addr);
-
-  memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-
-  x->skip = ctx->skip;
-
-  // If segmentation in use
-  if (seg->enabled) {
-    // For in frame complexity AQ copy the segment id from the segment map.
-    if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
-      const uint8_t *const map =
-          seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
-      mi_addr->segment_id =
-          map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0;
-      reset_tx_size(x, mi_addr, cm->tx_mode);
-    }
-    // Else for cyclic refresh mode update the segment map, set the segment id
-    // and then update the quantizer.
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
-      av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize,
-                                        ctx->rate, ctx->dist, x->skip);
-      reset_tx_size(x, mi_addr, cm->tx_mode);
-    }
-    if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
-      mi_addr->uv_mode = UV_DC_PRED;
-  }
-
-  for (i = 0; i < num_planes; ++i) {
-    p[i].coeff = ctx->coeff[i];
-    p[i].qcoeff = ctx->qcoeff[i];
-    pd[i].dqcoeff = ctx->dqcoeff[i];
-    p[i].eobs = ctx->eobs[i];
-    p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
-  }
-  for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
-  // Restore the coding context of the MB to that that was in place
-  // when the mode was picked for it
-  for (y = 0; y < mi_height; y++)
-    for (x_idx = 0; x_idx < mi_width; x_idx++)
-      if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
-          (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
-        xd->mi[x_idx + y * mis] = mi_addr;
-      }
-
-  if (cpi->oxcf.aq_mode) av1_init_plane_quantizers(cpi, x, mi_addr->segment_id);
-
-  if (dry_run) return;
-
-#if CONFIG_INTERNAL_STATS
-  {
-    unsigned int *const mode_chosen_counts =
-        (unsigned int *)cpi->mode_chosen_counts;  // Cast const away.
-    if (frame_is_intra_only(cm)) {
-      static const int kf_mode_index[] = {
-        THR_DC /*DC_PRED*/,
-        THR_V_PRED /*V_PRED*/,
-        THR_H_PRED /*H_PRED*/,
-        THR_D45_PRED /*D45_PRED*/,
-        THR_D135_PRED /*D135_PRED*/,
-        THR_D113_PRED /*D113_PRED*/,
-        THR_D157_PRED /*D157_PRED*/,
-        THR_D203_PRED /*D203_PRED*/,
-        THR_D67_PRED /*D67_PRED*/,
-        THR_SMOOTH,   /*SMOOTH_PRED*/
-        THR_SMOOTH_V, /*SMOOTH_V_PRED*/
-        THR_SMOOTH_H, /*SMOOTH_H_PRED*/
-        THR_PAETH /*PAETH_PRED*/,
-      };
-      ++mode_chosen_counts[kf_mode_index[mi_addr->mode]];
-    } else {
-      // Note how often each mode chosen as best
-      ++mode_chosen_counts[ctx->best_mode_index];
-    }
-  }
-#endif
-  if (!frame_is_intra_only(cm)) {
-    if (is_inter_block(mi_addr)) {
-      // TODO(sarahparker): global motion stats need to be handled per-tile
-      // to be compatible with tile-based threading.
-      update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc);
-    }
-
-    if (cm->interp_filter == SWITCHABLE &&
-        mi_addr->motion_mode != WARPED_CAUSAL &&
-        !is_nontrans_global_motion(xd, xd->mi[0])) {
-      update_filter_type_count(tile_data->allow_update_cdf, td->counts, xd,
-                               mi_addr);
-    }
-
-    rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
-    rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
-    rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
-  }
-
-  const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
-  const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
-  av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
-}
-
-void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
-                          int mi_row, int mi_col, const int num_planes) {
-  // Set current frame pointer.
-  x->e_mbd.cur_buf = src;
-
-  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
-  // the static analysis warnings.
-  for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
-    const int is_uv = i > 0;
-    setup_pred_plane(&x->plane[i].src, x->e_mbd.mi[0]->sb_type, src->buffers[i],
-                     src->crop_widths[is_uv], src->crop_heights[is_uv],
-                     src->strides[is_uv], mi_row, mi_col, NULL,
-                     x->e_mbd.plane[i].subsampling_x,
-                     x->e_mbd.plane[i].subsampling_y);
-  }
-}
-
-static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                              int8_t segment_id) {
-  const AV1_COMMON *const cm = &cpi->common;
-  av1_init_plane_quantizers(cpi, x, segment_id);
-  aom_clear_system_state();
-  int segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-  return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
-}
-
-static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) {
-  const AV1_COMMON *const cm = &cpi->common;
-
-  return av1_compute_rd_mult(
-      cpi, cm->base_qindex + xd->delta_qindex + cm->y_dc_delta_q);
-}
-
-static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
-                             MACROBLOCK *const x, int mi_row, int mi_col,
-                             RD_STATS *rd_cost, PARTITION_TYPE partition,
-                             BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-                             int64_t best_rd) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  TileInfo *const tile_info = &tile_data->tile_info;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi;
-  MB_MODE_INFO *ctx_mbmi = &ctx->mic;
-  struct macroblock_plane *const p = x->plane;
-  struct macroblockd_plane *const pd = xd->plane;
-  const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
-  const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode;
-  int i, orig_rdmult;
-
-  if (best_rd < 0) {
-    ctx->rdcost = INT64_MAX;
-    ctx->skip = 0;
-    av1_invalid_rd_stats(rd_cost);
-    return;
-  }
-
-  aom_clear_system_state();
-
-  set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-
-  mbmi = xd->mi[0];
-
-  if (ctx->rd_mode_is_ready) {
-    assert(ctx_mbmi->sb_type == bsize);
-    assert(ctx_mbmi->partition == partition);
-    *mbmi = *ctx_mbmi;
-    rd_cost->rate = ctx->rate;
-    rd_cost->dist = ctx->dist;
-    rd_cost->rdcost = ctx->rdcost;
-  } else {
-    mbmi->sb_type = bsize;
-    mbmi->partition = partition;
-  }
-
-#if CONFIG_RD_DEBUG
-  mbmi->mi_row = mi_row;
-  mbmi->mi_col = mi_col;
-#endif
-
-  for (i = 0; i < num_planes; ++i) {
-    p[i].coeff = ctx->coeff[i];
-    p[i].qcoeff = ctx->qcoeff[i];
-    pd[i].dqcoeff = ctx->dqcoeff[i];
-    p[i].eobs = ctx->eobs[i];
-    p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
-  }
-
-  for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
-
-  if (!ctx->rd_mode_is_ready) {
-    ctx->skippable = 0;
-
-    // Set to zero to make sure we do not use the previous encoded frame stats
-    mbmi->skip = 0;
-
-    // Reset skip mode flag.
-    mbmi->skip_mode = 0;
-  }
-
-  x->skip_chroma_rd =
-      !is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
-                           xd->plane[1].subsampling_y);
-
-  if (ctx->rd_mode_is_ready) {
-    x->skip = ctx->skip;
-    *x->mbmi_ext = ctx->mbmi_ext;
-    return;
-  }
-
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    x->source_variance = av1_high_get_sby_perpixel_variance(
-        cpi, &x->plane[0].src, bsize, xd->bd);
-  } else {
-    x->source_variance =
-        av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
-  }
-
-  // Save rdmult before it might be changed, so it can be restored later.
-  orig_rdmult = x->rdmult;
-
-  if (aq_mode == VARIANCE_AQ) {
-    if (cpi->vaq_refresh) {
-      const int energy = bsize <= BLOCK_16X16
-                             ? x->mb_energy
-                             : av1_log_block_var(cpi, x, bsize);
-      mbmi->segment_id = energy;
-    }
-    x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
-  } else if (aq_mode == COMPLEXITY_AQ) {
-    x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
-  } else if (aq_mode == CYCLIC_REFRESH_AQ) {
-    // If segment is boosted, use rdmult for that segment.
-    if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
-      x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
-  }
-
-  if (deltaq_mode > 0) x->rdmult = set_deltaq_rdmult(cpi, xd);
-
-  // Find best coding mode & reconstruct the MB so it is available
-  // as a predictor for MBs that follow in the SB
-  if (frame_is_intra_only(cm)) {
-    av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx,
-                              best_rd);
-  } else {
-    if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
-      av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
-                                         rd_cost, bsize, ctx, best_rd);
-    } else {
-      av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
-                                bsize, ctx, best_rd);
-    }
-  }
-
-  // Examine the resulting rate and for AQ mode 2 make a segment choice.
-  if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
-      (bsize >= BLOCK_16X16) &&
-      (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
-       cpi->refresh_alt2_ref_frame ||
-       (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
-    av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
-  }
-
-  x->rdmult = orig_rdmult;
-
-  // TODO(jingning) The rate-distortion optimization flow needs to be
-  // refactored to provide proper exit/return handle.
-  if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
-
-  ctx->rate = rd_cost->rate;
-  ctx->dist = rd_cost->dist;
-  ctx->rdcost = rd_cost->rdcost;
-}
-
-static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts,
-                                    PREDICTION_MODE mode, int16_t mode_context,
-                                    uint8_t allow_update_cdf) {
-  (void)counts;
-
-  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
-  if (mode == NEWMV) {
-#if CONFIG_ENTROPY_STATS
-    ++counts->newmv_mode[mode_ctx][0];
-#endif
-    if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 0, 2);
-    return;
-  } else {
-#if CONFIG_ENTROPY_STATS
-    ++counts->newmv_mode[mode_ctx][1];
-#endif
-    if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 1, 2);
-
-    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
-    if (mode == GLOBALMV) {
-#if CONFIG_ENTROPY_STATS
-      ++counts->zeromv_mode[mode_ctx][0];
-#endif
-      if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2);
-      return;
-    } else {
-#if CONFIG_ENTROPY_STATS
-      ++counts->zeromv_mode[mode_ctx][1];
-#endif
-      if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2);
-      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
-#if CONFIG_ENTROPY_STATS
-      ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
-#endif
-      if (allow_update_cdf)
-        update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2);
-    }
-  }
-}
-
-static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
-                               FRAME_COUNTS *counts, uint8_t allow_update_cdf) {
-  FRAME_CONTEXT *fc = xd->tile_ctx;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize);
-
-  (void)counts;
-
-  if (mbmi->mode == DC_PRED) {
-    const int n = pmi->palette_size[0];
-    const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
-
-#if CONFIG_ENTROPY_STATS
-    ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0];
-#endif
-    if (allow_update_cdf)
-      update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx],
-                 n > 0, 2);
-    if (n > 0) {
-#if CONFIG_ENTROPY_STATS
-      ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
-#endif
-      if (allow_update_cdf) {
-        update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx],
-                   n - PALETTE_MIN_SIZE, PALETTE_SIZES);
-      }
-    }
-  }
-
-  if (mbmi->uv_mode == UV_DC_PRED) {
-    const int n = pmi->palette_size[1];
-    const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
-
-#if CONFIG_ENTROPY_STATS
-    ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0];
-#endif
-    if (allow_update_cdf)
-      update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2);
-
-    if (n > 0) {
-#if CONFIG_ENTROPY_STATS
-      ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
-#endif
-      if (allow_update_cdf) {
-        update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx],
-                   n - PALETTE_MIN_SIZE, PALETTE_SIZES);
-      }
-    }
-  }
-}
-
-static void sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts,
-                            MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
-                            const MB_MODE_INFO *above_mi,
-                            const MB_MODE_INFO *left_mi, const int intraonly,
-                            const int mi_row, const int mi_col,
-                            uint8_t allow_update_cdf) {
-  FRAME_CONTEXT *fc = xd->tile_ctx;
-  const PREDICTION_MODE y_mode = mbmi->mode;
-  const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
-  (void)counts;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-
-  if (intraonly) {
-#if CONFIG_ENTROPY_STATS
-    const PREDICTION_MODE above = av1_above_block_mode(above_mi);
-    const PREDICTION_MODE left = av1_left_block_mode(left_mi);
-    const int above_ctx = intra_mode_context[above];
-    const int left_ctx = intra_mode_context[left];
-    ++counts->kf_y_mode[above_ctx][left_ctx][y_mode];
-#endif  // CONFIG_ENTROPY_STATS
-    if (allow_update_cdf)
-      update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES);
-  } else {
-#if CONFIG_ENTROPY_STATS
-    ++counts->y_mode[size_group_lookup[bsize]][y_mode];
-#endif  // CONFIG_ENTROPY_STATS
-    if (allow_update_cdf)
-      update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES);
-  }
-
-  if (av1_filter_intra_allowed(cm, mbmi)) {
-    const int use_filter_intra_mode =
-        mbmi->filter_intra_mode_info.use_filter_intra;
-#if CONFIG_ENTROPY_STATS
-    ++counts->filter_intra[mbmi->sb_type][use_filter_intra_mode];
-    if (use_filter_intra_mode) {
-      ++counts
-            ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode];
-    }
-#endif  // CONFIG_ENTROPY_STATS
-    if (allow_update_cdf) {
-      update_cdf(fc->filter_intra_cdfs[mbmi->sb_type], use_filter_intra_mode,
-                 2);
-      if (use_filter_intra_mode) {
-        update_cdf(fc->filter_intra_mode_cdf,
-                   mbmi->filter_intra_mode_info.filter_intra_mode,
-                   FILTER_INTRA_MODES);
-      }
-    }
-  }
-  if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
-#if CONFIG_ENTROPY_STATS
-    ++counts->angle_delta[mbmi->mode - V_PRED]
-                         [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA];
-#endif
-    if (allow_update_cdf) {
-      update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED],
-                 mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA,
-                 2 * MAX_ANGLE_DELTA + 1);
-    }
-  }
-
-  if (!is_chroma_reference(mi_row, mi_col, bsize,
-                           xd->plane[AOM_PLANE_U].subsampling_x,
-                           xd->plane[AOM_PLANE_U].subsampling_y))
-    return;
-
-#if CONFIG_ENTROPY_STATS
-  ++counts->uv_mode[is_cfl_allowed(xd)][y_mode][uv_mode];
-#endif  // CONFIG_ENTROPY_STATS
-  if (allow_update_cdf) {
-    const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd);
-    update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode,
-               UV_INTRA_MODES - !cfl_allowed);
-  }
-  if (uv_mode == UV_CFL_PRED) {
-    const int joint_sign = mbmi->cfl_alpha_signs;
-    const int idx = mbmi->cfl_alpha_idx;
-
-#if CONFIG_ENTROPY_STATS
-    ++counts->cfl_sign[joint_sign];
-#endif
-    if (allow_update_cdf)
-      update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS);
-    if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
-      aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
-
-#if CONFIG_ENTROPY_STATS
-      ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)];
-#endif
-      if (allow_update_cdf)
-        update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE);
-    }
-    if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
-      aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
-
-#if CONFIG_ENTROPY_STATS
-      ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)];
-#endif
-      if (allow_update_cdf)
-        update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE);
-    }
-  }
-  if (av1_is_directional_mode(get_uv_mode(uv_mode)) &&
-      av1_use_angle_delta(bsize)) {
-#if CONFIG_ENTROPY_STATS
-    ++counts->angle_delta[uv_mode - UV_V_PRED]
-                         [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA];
-#endif
-    if (allow_update_cdf) {
-      update_cdf(fc->angle_delta_cdf[uv_mode - UV_V_PRED],
-                 mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA,
-                 2 * MAX_ANGLE_DELTA + 1);
-    }
-  }
-  if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
-    update_palette_cdf(xd, mbmi, counts, allow_update_cdf);
-}
-
-static void update_stats(const AV1_COMMON *const cm, TileDataEnc *tile_data,
-                         ThreadData *td, int mi_row, int mi_col) {
-  MACROBLOCK *x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  FRAME_CONTEXT *fc = xd->tile_ctx;
-  const uint8_t allow_update_cdf = tile_data->allow_update_cdf;
-
-  // delta quant applies to both intra and inter
-  const int super_block_upper_left =
-      ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
-      ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
-
-  const int seg_ref_active =
-      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
-
-  if (cm->skip_mode_flag && !seg_ref_active && is_comp_ref_allowed(bsize)) {
-    const int skip_mode_ctx = av1_get_skip_mode_context(xd);
-#if CONFIG_ENTROPY_STATS
-    td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++;
-#endif
-    if (allow_update_cdf)
-      update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
-  }
-
-  if (!mbmi->skip_mode) {
-    if (!seg_ref_active) {
-      const int skip_ctx = av1_get_skip_context(xd);
-#if CONFIG_ENTROPY_STATS
-      td->counts->skip[skip_ctx][mbmi->skip]++;
-#endif
-      if (allow_update_cdf) update_cdf(fc->skip_cdfs[skip_ctx], mbmi->skip, 2);
-    }
-  }
-
-  if (cm->delta_q_present_flag &&
-      (bsize != cm->seq_params.sb_size || !mbmi->skip) &&
-      super_block_upper_left) {
-#if CONFIG_ENTROPY_STATS
-    const int dq =
-        (mbmi->current_qindex - xd->current_qindex) / cm->delta_q_res;
-    const int absdq = abs(dq);
-    for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) {
-      td->counts->delta_q[i][1]++;
-    }
-    if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
-#endif
-    xd->current_qindex = mbmi->current_qindex;
-    if (cm->delta_lf_present_flag) {
-      if (cm->delta_lf_multi) {
-        const int frame_lf_count =
-            av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
-        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
-#if CONFIG_ENTROPY_STATS
-          const int delta_lf =
-              (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) / cm->delta_lf_res;
-          const int abs_delta_lf = abs(delta_lf);
-          for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
-            td->counts->delta_lf_multi[lf_id][i][1]++;
-          }
-          if (abs_delta_lf < DELTA_LF_SMALL)
-            td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
-#endif
-          xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
-        }
-      } else {
-#if CONFIG_ENTROPY_STATS
-        const int delta_lf =
-            (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
-            cm->delta_lf_res;
-        const int abs_delta_lf = abs(delta_lf);
-        for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
-          td->counts->delta_lf[i][1]++;
-        }
-        if (abs_delta_lf < DELTA_LF_SMALL)
-          td->counts->delta_lf[abs_delta_lf][0]++;
-#endif
-        xd->delta_lf_from_base = mbmi->delta_lf_from_base;
-      }
-    }
-  }
-
-  if (!is_inter_block(mbmi)) {
-    sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi,
-                    frame_is_intra_only(cm), mi_row, mi_col,
-                    tile_data->allow_update_cdf);
-  }
-
-  if (av1_allow_intrabc(cm)) {
-    if (allow_update_cdf)
-      update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2);
-#if CONFIG_ENTROPY_STATS
-    ++td->counts->intrabc[is_intrabc_block(mbmi)];
-#endif  // CONFIG_ENTROPY_STATS
-  }
-
-  if (!frame_is_intra_only(cm)) {
-    RD_COUNTS *rdc = &td->rd_counts;
-
-    FRAME_COUNTS *const counts = td->counts;
-
-    if (mbmi->skip_mode) {
-      rdc->skip_mode_used_flag = 1;
-      if (cm->reference_mode == REFERENCE_MODE_SELECT) {
-        assert(has_second_ref(mbmi));
-        rdc->compound_ref_used_flag = 1;
-      }
-      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-      return;
-    }
-
-    const int inter_block = is_inter_block(mbmi);
-
-    if (!seg_ref_active) {
-#if CONFIG_ENTROPY_STATS
-      counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
-#endif
-      if (allow_update_cdf) {
-        update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
-                   inter_block, 2);
-      }
-      // If the segment reference feature is enabled we have only a single
-      // reference frame allowed for the segment so exclude it from
-      // the reference frame counts used to work out probabilities.
-      if (inter_block) {
-        const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
-        const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
-
-        av1_collect_neighbors_ref_counts(xd);
-
-        if (cm->reference_mode == REFERENCE_MODE_SELECT) {
-          if (has_second_ref(mbmi))
-            // This flag is also updated for 4x4 blocks
-            rdc->compound_ref_used_flag = 1;
-          if (is_comp_ref_allowed(bsize)) {
-#if CONFIG_ENTROPY_STATS
-            counts->comp_inter[av1_get_reference_mode_context(xd)]
-                              [has_second_ref(mbmi)]++;
-#endif  // CONFIG_ENTROPY_STATS
-            if (allow_update_cdf) {
-              update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi),
-                         2);
-            }
-          }
-        }
-
-        if (has_second_ref(mbmi)) {
-          const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
-                                                        ? UNIDIR_COMP_REFERENCE
-                                                        : BIDIR_COMP_REFERENCE;
-          if (allow_update_cdf) {
-            update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type,
-                       COMP_REFERENCE_TYPES);
-          }
-#if CONFIG_ENTROPY_STATS
-          counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
-                               [comp_ref_type]++;
-#endif  // CONFIG_ENTROPY_STATS
-
-          if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
-            const int bit = (ref0 == BWDREF_FRAME);
-            if (allow_update_cdf)
-              update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2);
-#if CONFIG_ENTROPY_STATS
-            counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0]
-                                [bit]++;
-#endif  // CONFIG_ENTROPY_STATS
-            if (!bit) {
-              const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
-              if (allow_update_cdf)
-                update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2);
-#if CONFIG_ENTROPY_STATS
-              counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
-                                  [bit1]++;
-#endif  // CONFIG_ENTROPY_STATS
-              if (bit1) {
-                if (allow_update_cdf) {
-                  update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd),
-                             ref1 == GOLDEN_FRAME, 2);
-                }
-#if CONFIG_ENTROPY_STATS
-                counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)]
-                                    [2][ref1 == GOLDEN_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-              }
-            }
-          } else {
-            const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
-            if (allow_update_cdf)
-              update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2);
-#if CONFIG_ENTROPY_STATS
-            counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++;
-#endif  // CONFIG_ENTROPY_STATS
-            if (!bit) {
-              if (allow_update_cdf) {
-                update_cdf(av1_get_pred_cdf_comp_ref_p1(xd),
-                           ref0 == LAST2_FRAME, 2);
-              }
-#if CONFIG_ENTROPY_STATS
-              counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1]
-                              [ref0 == LAST2_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-            } else {
-              if (allow_update_cdf) {
-                update_cdf(av1_get_pred_cdf_comp_ref_p2(xd),
-                           ref0 == GOLDEN_FRAME, 2);
-              }
-#if CONFIG_ENTROPY_STATS
-              counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2]
-                              [ref0 == GOLDEN_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-            }
-            if (allow_update_cdf) {
-              update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd),
-                         ref1 == ALTREF_FRAME, 2);
-            }
-#if CONFIG_ENTROPY_STATS
-            counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0]
-                               [ref1 == ALTREF_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-            if (ref1 != ALTREF_FRAME) {
-              if (allow_update_cdf) {
-                update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd),
-                           ref1 == ALTREF2_FRAME, 2);
-              }
-#if CONFIG_ENTROPY_STATS
-              counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1]
-                                 [ref1 == ALTREF2_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-            }
-          }
-        } else {
-          const int bit = (ref0 >= BWDREF_FRAME);
-          if (allow_update_cdf)
-            update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2);
-#if CONFIG_ENTROPY_STATS
-          counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
-#endif  // CONFIG_ENTROPY_STATS
-          if (bit) {
-            assert(ref0 <= ALTREF_FRAME);
-            if (allow_update_cdf) {
-              update_cdf(av1_get_pred_cdf_single_ref_p2(xd),
-                         ref0 == ALTREF_FRAME, 2);
-            }
-#if CONFIG_ENTROPY_STATS
-            counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
-                              [ref0 == ALTREF_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-            if (ref0 != ALTREF_FRAME) {
-              if (allow_update_cdf) {
-                update_cdf(av1_get_pred_cdf_single_ref_p6(xd),
-                           ref0 == ALTREF2_FRAME, 2);
-              }
-#if CONFIG_ENTROPY_STATS
-              counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
-                                [ref0 == ALTREF2_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-            }
-          } else {
-            const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
-            if (allow_update_cdf)
-              update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2);
-#if CONFIG_ENTROPY_STATS
-            counts
-                ->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++;
-#endif  // CONFIG_ENTROPY_STATS
-            if (!bit1) {
-              if (allow_update_cdf) {
-                update_cdf(av1_get_pred_cdf_single_ref_p4(xd),
-                           ref0 != LAST_FRAME, 2);
-              }
-#if CONFIG_ENTROPY_STATS
-              counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3]
-                                [ref0 != LAST_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-            } else {
-              if (allow_update_cdf) {
-                update_cdf(av1_get_pred_cdf_single_ref_p5(xd),
-                           ref0 != LAST3_FRAME, 2);
-              }
-#if CONFIG_ENTROPY_STATS
-              counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4]
-                                [ref0 != LAST3_FRAME]++;
-#endif  // CONFIG_ENTROPY_STATS
-            }
-          }
-        }
-
-        if (cm->seq_params.enable_interintra_compound &&
-            is_interintra_allowed(mbmi)) {
-          const int bsize_group = size_group_lookup[bsize];
-          if (mbmi->ref_frame[1] == INTRA_FRAME) {
-#if CONFIG_ENTROPY_STATS
-            counts->interintra[bsize_group][1]++;
-#endif
-            if (allow_update_cdf)
-              update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
-#if CONFIG_ENTROPY_STATS
-            counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
-#endif
-            if (allow_update_cdf) {
-              update_cdf(fc->interintra_mode_cdf[bsize_group],
-                         mbmi->interintra_mode, INTERINTRA_MODES);
-            }
-            if (is_interintra_wedge_used(bsize)) {
-#if CONFIG_ENTROPY_STATS
-              counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
-#endif
-              if (allow_update_cdf) {
-                update_cdf(fc->wedge_interintra_cdf[bsize],
-                           mbmi->use_wedge_interintra, 2);
-              }
-              if (mbmi->use_wedge_interintra) {
-#if CONFIG_ENTROPY_STATS
-                counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++;
-#endif
-                if (allow_update_cdf) {
-                  update_cdf(fc->wedge_idx_cdf[bsize],
-                             mbmi->interintra_wedge_index, 16);
-                }
-              }
-            }
-          } else {
-#if CONFIG_ENTROPY_STATS
-            counts->interintra[bsize_group][0]++;
-#endif
-            if (allow_update_cdf)
-              update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
-          }
-        }
-
-        set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-        const MOTION_MODE motion_allowed =
-            cm->switchable_motion_mode
-                ? motion_mode_allowed(xd->global_motion, xd, mbmi,
-                                      cm->allow_warped_motion)
-                : SIMPLE_TRANSLATION;
-        if (mbmi->ref_frame[1] != INTRA_FRAME) {
-          if (motion_allowed == WARPED_CAUSAL) {
-#if CONFIG_ENTROPY_STATS
-            counts->motion_mode[bsize][mbmi->motion_mode]++;
-#endif
-            if (allow_update_cdf) {
-              update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode,
-                         MOTION_MODES);
-            }
-          } else if (motion_allowed == OBMC_CAUSAL) {
-#if CONFIG_ENTROPY_STATS
-            counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
-#endif
-            if (allow_update_cdf) {
-              update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL,
-                         2);
-            }
-          }
-        }
-
-        if (has_second_ref(mbmi)) {
-          assert(cm->reference_mode != SINGLE_REFERENCE &&
-                 is_inter_compound_mode(mbmi->mode) &&
-                 mbmi->motion_mode == SIMPLE_TRANSLATION);
-
-          const int masked_compound_used =
-              is_any_masked_compound_used(bsize) &&
-              cm->seq_params.enable_masked_compound;
-          if (masked_compound_used) {
-            const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
-#if CONFIG_ENTROPY_STATS
-            ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx];
-#endif
-            if (allow_update_cdf) {
-              update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx],
-                         mbmi->comp_group_idx, 2);
-            }
-          }
-
-          if (mbmi->comp_group_idx == 0) {
-            const int comp_index_ctx = get_comp_index_context(cm, xd);
-#if CONFIG_ENTROPY_STATS
-            ++counts->compound_index[comp_index_ctx][mbmi->compound_idx];
-#endif
-            if (allow_update_cdf) {
-              update_cdf(fc->compound_index_cdf[comp_index_ctx],
-                         mbmi->compound_idx, 2);
-            }
-          } else {
-            assert(masked_compound_used);
-            if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
-#if CONFIG_ENTROPY_STATS
-              ++counts->compound_type[bsize][mbmi->interinter_comp.type - 1];
-#endif
-              if (allow_update_cdf) {
-                update_cdf(fc->compound_type_cdf[bsize],
-                           mbmi->interinter_comp.type - 1, COMPOUND_TYPES - 1);
-              }
-            }
-          }
-        }
-        if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
-          if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
-#if CONFIG_ENTROPY_STATS
-            counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++;
-#endif
-            if (allow_update_cdf) {
-              update_cdf(fc->wedge_idx_cdf[bsize],
-                         mbmi->interinter_comp.wedge_index, 16);
-            }
-          }
-        }
-      }
-    }
-
-    if (inter_block &&
-        !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
-      int16_t mode_ctx;
-      const PREDICTION_MODE mode = mbmi->mode;
-
-      mode_ctx =
-          av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
-      if (has_second_ref(mbmi)) {
-#if CONFIG_ENTROPY_STATS
-        ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
-#endif
-        if (allow_update_cdf)
-          update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
-                     INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
-      } else {
-        update_inter_mode_stats(fc, counts, mode, mode_ctx, allow_update_cdf);
-      }
-
-      int mode_allowed = (mbmi->mode == NEWMV);
-      mode_allowed |= (mbmi->mode == NEW_NEWMV);
-      if (mode_allowed) {
-        uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-        int idx;
-
-        for (idx = 0; idx < 2; ++idx) {
-          if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-#if CONFIG_ENTROPY_STATS
-            uint8_t drl_ctx =
-                av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
-            ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
-#endif
-
-            if (mbmi->ref_mv_idx == idx) break;
-          }
-        }
-      }
-
-      if (have_nearmv_in_inter_mode(mbmi->mode)) {
-        uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-        int idx;
-
-        for (idx = 1; idx < 3; ++idx) {
-          if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-#if CONFIG_ENTROPY_STATS
-            uint8_t drl_ctx =
-                av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
-            ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
-#endif
-
-            if (mbmi->ref_mv_idx == idx - 1) break;
-          }
-        }
-      }
-    }
-  }
-}
-
-typedef struct {
-  ENTROPY_CONTEXT a[MAX_MIB_SIZE * MAX_MB_PLANE];
-  ENTROPY_CONTEXT l[MAX_MIB_SIZE * MAX_MB_PLANE];
-  PARTITION_CONTEXT sa[MAX_MIB_SIZE];
-  PARTITION_CONTEXT sl[MAX_MIB_SIZE];
-  TXFM_CONTEXT *p_ta;
-  TXFM_CONTEXT *p_tl;
-  TXFM_CONTEXT ta[MAX_MIB_SIZE];
-  TXFM_CONTEXT tl[MAX_MIB_SIZE];
-} RD_SEARCH_MACROBLOCK_CONTEXT;
-
-static void restore_context(MACROBLOCK *x,
-                            const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row,
-                            int mi_col, BLOCK_SIZE bsize,
-                            const int num_planes) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  int p;
-  const int num_4x4_blocks_wide =
-      block_size_wide[bsize] >> tx_size_wide_log2[0];
-  const int num_4x4_blocks_high =
-      block_size_high[bsize] >> tx_size_high_log2[0];
-  int mi_width = mi_size_wide[bsize];
-  int mi_height = mi_size_high[bsize];
-  for (p = 0; p < num_planes; p++) {
-    int tx_col = mi_col;
-    int tx_row = mi_row & MAX_MIB_MASK;
-    memcpy(xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
-           ctx->a + num_4x4_blocks_wide * p,
-           (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
-               xd->plane[p].subsampling_x);
-    memcpy(xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
-           ctx->l + num_4x4_blocks_high * p,
-           (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
-               xd->plane[p].subsampling_y);
-  }
-  memcpy(xd->above_seg_context + mi_col, ctx->sa,
-         sizeof(*xd->above_seg_context) * mi_width);
-  memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), ctx->sl,
-         sizeof(xd->left_seg_context[0]) * mi_height);
-  xd->above_txfm_context = ctx->p_ta;
-  xd->left_txfm_context = ctx->p_tl;
-  memcpy(xd->above_txfm_context, ctx->ta,
-         sizeof(*xd->above_txfm_context) * mi_width);
-  memcpy(xd->left_txfm_context, ctx->tl,
-         sizeof(*xd->left_txfm_context) * mi_height);
-}
-
-static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
-                         int mi_row, int mi_col, BLOCK_SIZE bsize,
-                         const int num_planes) {
-  const MACROBLOCKD *xd = &x->e_mbd;
-  int p;
-  const int num_4x4_blocks_wide =
-      block_size_wide[bsize] >> tx_size_wide_log2[0];
-  const int num_4x4_blocks_high =
-      block_size_high[bsize] >> tx_size_high_log2[0];
-  int mi_width = mi_size_wide[bsize];
-  int mi_height = mi_size_high[bsize];
-
-  // buffer the above/left context information of the block in search.
-  for (p = 0; p < num_planes; ++p) {
-    int tx_col = mi_col;
-    int tx_row = mi_row & MAX_MIB_MASK;
-    memcpy(ctx->a + num_4x4_blocks_wide * p,
-           xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
-           (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
-               xd->plane[p].subsampling_x);
-    memcpy(ctx->l + num_4x4_blocks_high * p,
-           xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
-           (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
-               xd->plane[p].subsampling_y);
-  }
-  memcpy(ctx->sa, xd->above_seg_context + mi_col,
-         sizeof(*xd->above_seg_context) * mi_width);
-  memcpy(ctx->sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK),
-         sizeof(xd->left_seg_context[0]) * mi_height);
-  memcpy(ctx->ta, xd->above_txfm_context,
-         sizeof(*xd->above_txfm_context) * mi_width);
-  memcpy(ctx->tl, xd->left_txfm_context,
-         sizeof(*xd->left_txfm_context) * mi_height);
-  ctx->p_ta = xd->above_txfm_context;
-  ctx->p_tl = xd->left_txfm_context;
-}
-
-static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
-                     ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col,
-                     RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                     PARTITION_TYPE partition,
-                     const PICK_MODE_CONTEXT *const ctx, int *rate) {
-  TileInfo *const tile = &tile_data->tile_info;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *xd = &x->e_mbd;
-
-  set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  mbmi->partition = partition;
-  update_state(cpi, tile_data, td, ctx, mi_row, mi_col, bsize, dry_run);
-
-  if (!dry_run) av1_set_coeff_buffer(cpi, x, mi_row, mi_col);
-
-  encode_superblock(cpi, tile_data, td, tp, dry_run, mi_row, mi_col, bsize,
-                    rate);
-
-  if (dry_run == 0)
-    x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
-
-  if (!dry_run) {
-    if (bsize == cpi->common.seq_params.sb_size && mbmi->skip == 1 &&
-        cpi->common.delta_lf_present_flag) {
-      const int frame_lf_count = av1_num_planes(&cpi->common) > 1
-                                     ? FRAME_LF_COUNT
-                                     : FRAME_LF_COUNT - 2;
-      for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
-        mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
-      mbmi->delta_lf_from_base = xd->delta_lf_from_base;
-    }
-    if (has_second_ref(mbmi)) {
-      if (mbmi->compound_idx == 0 ||
-          mbmi->interinter_comp.type == COMPOUND_AVERAGE)
-        mbmi->comp_group_idx = 0;
-      else
-        mbmi->comp_group_idx = 1;
-    }
-    update_stats(&cpi->common, tile_data, td, mi_row, mi_col);
-  }
-}
-
-static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
-                      TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row,
-                      int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                      PC_TREE *pc_tree, int *rate) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const int hbs = mi_size_wide[bsize] / 2;
-  const int is_partition_root = bsize >= BLOCK_8X8;
-  const int ctx = is_partition_root
-                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
-                      : -1;
-  const PARTITION_TYPE partition = pc_tree->partitioning;
-  const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
-  int quarter_step = mi_size_wide[bsize] / 4;
-  int i;
-  BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
-
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
-  if (!dry_run && ctx >= 0) {
-    const int has_rows = (mi_row + hbs) < cm->mi_rows;
-    const int has_cols = (mi_col + hbs) < cm->mi_cols;
-
-    if (has_rows && has_cols) {
-#if CONFIG_ENTROPY_STATS
-      td->counts->partition[ctx][partition]++;
-#endif
-
-      if (tile_data->allow_update_cdf) {
-        FRAME_CONTEXT *fc = xd->tile_ctx;
-        update_cdf(fc->partition_cdf[ctx], partition,
-                   partition_cdf_length(bsize));
-      }
-    }
-  }
-
-  switch (partition) {
-    case PARTITION_NONE:
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->none, rate);
-      break;
-    case PARTITION_VERT:
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->vertical[0], rate);
-      if (mi_col + hbs < cm->mi_cols) {
-        encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
-                 partition, &pc_tree->vertical[1], rate);
-      }
-      break;
-    case PARTITION_HORZ:
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->horizontal[0], rate);
-      if (mi_row + hbs < cm->mi_rows) {
-        encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
-                 partition, &pc_tree->horizontal[1], rate);
-      }
-      break;
-    case PARTITION_SPLIT:
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
-                pc_tree->split[0], rate);
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize,
-                pc_tree->split[1], rate);
-      encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize,
-                pc_tree->split[2], rate);
-      encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run,
-                subsize, pc_tree->split[3], rate);
-      break;
-
-    case PARTITION_HORZ_A:
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
-               partition, &pc_tree->horizontala[0], rate);
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
-               partition, &pc_tree->horizontala[1], rate);
-      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
-               partition, &pc_tree->horizontala[2], rate);
-      break;
-    case PARTITION_HORZ_B:
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->horizontalb[0], rate);
-      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
-               partition, &pc_tree->horizontalb[1], rate);
-      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
-               bsize2, partition, &pc_tree->horizontalb[2], rate);
-      break;
-    case PARTITION_VERT_A:
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
-               partition, &pc_tree->verticala[0], rate);
-      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
-               partition, &pc_tree->verticala[1], rate);
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
-               partition, &pc_tree->verticala[2], rate);
-
-      break;
-    case PARTITION_VERT_B:
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
-               partition, &pc_tree->verticalb[0], rate);
-      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
-               partition, &pc_tree->verticalb[1], rate);
-      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
-               bsize2, partition, &pc_tree->verticalb[2], rate);
-      break;
-    case PARTITION_HORZ_4:
-      for (i = 0; i < 4; ++i) {
-        int this_mi_row = mi_row + i * quarter_step;
-        if (i > 0 && this_mi_row >= cm->mi_rows) break;
-
-        encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
-                 partition, &pc_tree->horizontal4[i], rate);
-      }
-      break;
-    case PARTITION_VERT_4:
-      for (i = 0; i < 4; ++i) {
-        int this_mi_col = mi_col + i * quarter_step;
-        if (i > 0 && this_mi_col >= cm->mi_cols) break;
-
-        encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
-                 partition, &pc_tree->vertical4[i], rate);
-      }
-      break;
-    default: assert(0 && "Invalid partition type."); break;
-  }
-
-  update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
-}
-
-// Check to see if the given partition size is allowed for a specified number
-// of mi block rows and columns remaining in the image.
-// If not then return the largest allowed partition size
-static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
-                                      int cols_left, int *bh, int *bw) {
-  if (rows_left <= 0 || cols_left <= 0) {
-    return AOMMIN(bsize, BLOCK_8X8);
-  } else {
-    for (; bsize > 0; bsize -= 3) {
-      *bh = mi_size_high[bsize];
-      *bw = mi_size_wide[bsize];
-      if ((*bh <= rows_left) && (*bw <= cols_left)) {
-        break;
-      }
-    }
-  }
-  return bsize;
-}
-
-static void set_partial_sb_partition(const AV1_COMMON *const cm,
-                                     MB_MODE_INFO *mi, int bh_in, int bw_in,
-                                     int mi_rows_remaining,
-                                     int mi_cols_remaining, BLOCK_SIZE bsize,
-                                     MB_MODE_INFO **mib) {
-  int bh = bh_in;
-  int r, c;
-  for (r = 0; r < cm->seq_params.mib_size; r += bh) {
-    int bw = bw_in;
-    for (c = 0; c < cm->seq_params.mib_size; c += bw) {
-      const int index = r * cm->mi_stride + c;
-      mib[index] = mi + index;
-      mib[index]->sb_type = find_partition_size(
-          bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
-    }
-  }
-}
-
-// This function attempts to set all mode info entries in a given superblock
-// to the same block partition size.
-// However, at the bottom and right borders of the image the requested size
-// may not be allowed in which case this code attempts to choose the largest
-// allowable partition.
-static void set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
-                                   MB_MODE_INFO **mib, int mi_row, int mi_col,
-                                   BLOCK_SIZE bsize) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int mi_rows_remaining = tile->mi_row_end - mi_row;
-  const int mi_cols_remaining = tile->mi_col_end - mi_col;
-  int block_row, block_col;
-  MB_MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col;
-  int bh = mi_size_high[bsize];
-  int bw = mi_size_wide[bsize];
-
-  assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
-
-  // Apply the requested partition size to the SB if it is all "in image"
-  if ((mi_cols_remaining >= cm->seq_params.mib_size) &&
-      (mi_rows_remaining >= cm->seq_params.mib_size)) {
-    for (block_row = 0; block_row < cm->seq_params.mib_size; block_row += bh) {
-      for (block_col = 0; block_col < cm->seq_params.mib_size;
-           block_col += bw) {
-        int index = block_row * cm->mi_stride + block_col;
-        mib[index] = mi_upper_left + index;
-        mib[index]->sb_type = bsize;
-      }
-    }
-  } else {
-    // Else this is a partial SB.
-    set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining,
-                             mi_cols_remaining, bsize, mib);
-  }
-}
-
-static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
-                             TileDataEnc *tile_data, MB_MODE_INFO **mib,
-                             TOKENEXTRA **tp, int mi_row, int mi_col,
-                             BLOCK_SIZE bsize, int *rate, int64_t *dist,
-                             int do_recon, PC_TREE *pc_tree) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  TileInfo *const tile_info = &tile_data->tile_info;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const int bs = mi_size_wide[bsize];
-  const int hbs = bs / 2;
-  int i;
-  const int pl = (bsize >= BLOCK_8X8)
-                     ? partition_plane_context(xd, mi_row, mi_col, bsize)
-                     : 0;
-  const PARTITION_TYPE partition =
-      (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
-                           : PARTITION_NONE;
-  const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
-  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
-  RD_STATS last_part_rdc, none_rdc, chosen_rdc;
-  BLOCK_SIZE sub_subsize = BLOCK_4X4;
-  int splits_below = 0;
-  BLOCK_SIZE bs_type = mib[0]->sb_type;
-  int do_partition_search = 1;
-  PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
-
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
-  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
-
-  av1_invalid_rd_stats(&last_part_rdc);
-  av1_invalid_rd_stats(&none_rdc);
-  av1_invalid_rd_stats(&chosen_rdc);
-
-  pc_tree->partitioning = partition;
-
-  xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
-  xd->left_txfm_context =
-      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-  save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
-  if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
-    set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-    x->mb_energy = av1_log_block_var(cpi, x, bsize);
-  }
-
-  if (do_partition_search &&
-      cpi->sf.partition_search_type == SEARCH_PARTITION &&
-      cpi->sf.adjust_partitioning_from_last_frame) {
-    // Check if any of the sub blocks are further split.
-    if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
-      sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT);
-      splits_below = 1;
-      for (i = 0; i < 4; i++) {
-        int jj = i >> 1, ii = i & 0x01;
-        MB_MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs];
-        if (this_mi && this_mi->sb_type >= sub_subsize) {
-          splits_below = 0;
-        }
-      }
-    }
-
-    // If partition is not none try none unless each of the 4 splits are split
-    // even further..
-    if (partition != PARTITION_NONE && !splits_below &&
-        mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
-      pc_tree->partitioning = PARTITION_NONE;
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
-                       PARTITION_NONE, bsize, ctx_none, INT64_MAX);
-
-      if (none_rdc.rate < INT_MAX) {
-        none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
-        none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
-      }
-
-      restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-      mib[0]->sb_type = bs_type;
-      pc_tree->partitioning = partition;
-    }
-  }
-  for (int b = 0; b < 2; ++b) {
-    pc_tree->horizontal[b].skip_ref_frame_mask = 0;
-    pc_tree->vertical[b].skip_ref_frame_mask = 0;
-  }
-  for (int b = 0; b < 3; ++b) {
-    pc_tree->horizontala[b].skip_ref_frame_mask = 0;
-    pc_tree->horizontalb[b].skip_ref_frame_mask = 0;
-    pc_tree->verticala[b].skip_ref_frame_mask = 0;
-    pc_tree->verticalb[b].skip_ref_frame_mask = 0;
-  }
-  for (int b = 0; b < 4; ++b) {
-    pc_tree->horizontal4[b].skip_ref_frame_mask = 0;
-    pc_tree->vertical4[b].skip_ref_frame_mask = 0;
-  }
-  switch (partition) {
-    case PARTITION_NONE:
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
-                       PARTITION_NONE, bsize, ctx_none, INT64_MAX);
-      break;
-    case PARTITION_HORZ:
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
-                       PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
-                       INT64_MAX);
-      if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
-          mi_row + hbs < cm->mi_rows) {
-        RD_STATS tmp_rdc;
-        const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
-        av1_init_rd_stats(&tmp_rdc);
-        update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
-        encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
-                          mi_col, subsize, NULL);
-        rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
-                         PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
-                         INT64_MAX);
-        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
-          av1_invalid_rd_stats(&last_part_rdc);
-          break;
-        }
-        last_part_rdc.rate += tmp_rdc.rate;
-        last_part_rdc.dist += tmp_rdc.dist;
-        last_part_rdc.rdcost += tmp_rdc.rdcost;
-      }
-      break;
-    case PARTITION_VERT:
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
-                       PARTITION_VERT, subsize, &pc_tree->vertical[0],
-                       INT64_MAX);
-      if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
-          mi_col + hbs < cm->mi_cols) {
-        RD_STATS tmp_rdc;
-        const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
-        av1_init_rd_stats(&tmp_rdc);
-        update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
-        encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
-                          mi_col, subsize, NULL);
-        rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
-                         PARTITION_VERT, subsize,
-                         &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
-        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
-          av1_invalid_rd_stats(&last_part_rdc);
-          break;
-        }
-        last_part_rdc.rate += tmp_rdc.rate;
-        last_part_rdc.dist += tmp_rdc.dist;
-        last_part_rdc.rdcost += tmp_rdc.rdcost;
-      }
-      break;
-    case PARTITION_SPLIT:
-      last_part_rdc.rate = 0;
-      last_part_rdc.dist = 0;
-      last_part_rdc.rdcost = 0;
-      for (i = 0; i < 4; i++) {
-        int x_idx = (i & 1) * hbs;
-        int y_idx = (i >> 1) * hbs;
-        int jj = i >> 1, ii = i & 0x01;
-        RD_STATS tmp_rdc;
-        if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
-          continue;
-
-        av1_init_rd_stats(&tmp_rdc);
-        rd_use_partition(cpi, td, tile_data,
-                         mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
-                         mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
-                         &tmp_rdc.dist, i != 3, pc_tree->split[i]);
-        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
-          av1_invalid_rd_stats(&last_part_rdc);
-          break;
-        }
-        last_part_rdc.rate += tmp_rdc.rate;
-        last_part_rdc.dist += tmp_rdc.dist;
-      }
-      break;
-    case PARTITION_VERT_A:
-    case PARTITION_VERT_B:
-    case PARTITION_HORZ_A:
-    case PARTITION_HORZ_B:
-    case PARTITION_HORZ_4:
-    case PARTITION_VERT_4:
-      assert(0 && "Cannot handle extended partition types");
-    default: assert(0); break;
-  }
-
-  if (last_part_rdc.rate < INT_MAX) {
-    last_part_rdc.rate += x->partition_cost[pl][partition];
-    last_part_rdc.rdcost =
-        RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
-  }
-
-  if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
-      cpi->sf.partition_search_type == SEARCH_PARTITION &&
-      partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
-      (mi_row + bs < cm->mi_rows || mi_row + hbs == cm->mi_rows) &&
-      (mi_col + bs < cm->mi_cols || mi_col + hbs == cm->mi_cols)) {
-    BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
-    chosen_rdc.rate = 0;
-    chosen_rdc.dist = 0;
-
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-    pc_tree->partitioning = PARTITION_SPLIT;
-
-    // Split partition.
-    for (i = 0; i < 4; i++) {
-      int x_idx = (i & 1) * hbs;
-      int y_idx = (i >> 1) * hbs;
-      RD_STATS tmp_rdc;
-
-      if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
-        continue;
-
-      save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-      pc_tree->split[i]->partitioning = PARTITION_NONE;
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
-                       &tmp_rdc, PARTITION_SPLIT, split_subsize,
-                       &pc_tree->split[i]->none, INT64_MAX);
-
-      restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-      if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
-        av1_invalid_rd_stats(&chosen_rdc);
-        break;
-      }
-
-      chosen_rdc.rate += tmp_rdc.rate;
-      chosen_rdc.dist += tmp_rdc.dist;
-
-      if (i != 3)
-        encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
-                  OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
-
-      chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
-    }
-    if (chosen_rdc.rate < INT_MAX) {
-      chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
-      chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
-    }
-  }
-
-  // If last_part is better set the partitioning to that.
-  if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
-    mib[0]->sb_type = bsize;
-    if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
-    chosen_rdc = last_part_rdc;
-  }
-  // If none was better set the partitioning to that.
-  if (none_rdc.rdcost < chosen_rdc.rdcost) {
-    if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
-    chosen_rdc = none_rdc;
-  }
-
-  restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
-  // We must have chosen a partitioning and encoding or we'll fail later on.
-  // No other opportunities for success.
-  if (bsize == cm->seq_params.sb_size)
-    assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
-
-  if (do_recon) {
-    if (bsize == cm->seq_params.sb_size) {
-      // NOTE: To get estimate for rate due to the tokens, use:
-      // int rate_coeffs = 0;
-      // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
-      //           bsize, pc_tree, &rate_coeffs);
-      x->cb_offset = 0;
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
-                pc_tree, NULL);
-    } else {
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
-                pc_tree, NULL);
-    }
-  }
-
-  *rate = chosen_rdc.rate;
-  *dist = chosen_rdc.dist;
-}
-
-/* clang-format off */
-static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = {
-                            BLOCK_4X4,    //                     4x4
-  BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,    //    4x8,    8x4,     8x8
-  BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,    //   8x16,   16x8,   16x16
-  BLOCK_8X8,   BLOCK_8X8,   BLOCK_16X16,  //  16x32,  32x16,   32x32
-  BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,  //  32x64,  64x32,   64x64
-  BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,  // 64x128, 128x64, 128x128
-  BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,    //   4x16,   16x4,    8x32
-  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16,  //   32x8,  16x64,   64x16
-};
-
-static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = {
-                                  BLOCK_8X8,    //                     4x4
-  BLOCK_16X16,   BLOCK_16X16,   BLOCK_16X16,    //    4x8,    8x4,     8x8
-  BLOCK_32X32,   BLOCK_32X32,   BLOCK_32X32,    //   8x16,   16x8,   16x16
-  BLOCK_64X64,   BLOCK_64X64,   BLOCK_64X64,    //  16x32,  32x16,   32x32
-  BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST,  //  32x64,  64x32,   64x64
-  BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST,  // 64x128, 128x64, 128x128
-  BLOCK_16X16,   BLOCK_16X16,   BLOCK_32X32,    //   4x16,   16x4,    8x32
-  BLOCK_32X32,   BLOCK_LARGEST, BLOCK_LARGEST,  //   32x8,  16x64,   64x16
-};
-
-// Next square block size less or equal than current block size.
-static const BLOCK_SIZE next_square_size[BLOCK_SIZES_ALL] = {
-                              BLOCK_4X4,    //                     4x4
-  BLOCK_4X4,   BLOCK_4X4,     BLOCK_8X8,    //    4x8,    8x4,     8x8
-  BLOCK_8X8,   BLOCK_8X8,     BLOCK_16X16,  //   8x16,   16x8,   16x16
-  BLOCK_16X16, BLOCK_16X16,   BLOCK_32X32,  //  16x32,  32x16,   32x32
-  BLOCK_32X32, BLOCK_32X32,   BLOCK_64X64,  //  32x64,  64x32,   64x64
-  BLOCK_64X64, BLOCK_64X64, BLOCK_128X128,  // 64x128, 128x64, 128x128
-  BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,      //   4x16,   16x4,    8x32
-  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16,    //   32x8,  16x64,   64x16
-};
-/* clang-format on */
-
-// Look at all the mode_info entries for blocks that are part of this
-// partition and find the min and max values for sb_type.
-// At the moment this is designed to work on a superblock but could be
-// adjusted to use a size parameter.
-//
-// The min and max are assumed to have been initialized prior to calling this
-// function so repeat calls can accumulate a min and max of more than one
-// superblock.
-static void get_sb_partition_size_range(const AV1_COMMON *const cm,
-                                        MACROBLOCKD *xd, MB_MODE_INFO **mib,
-                                        BLOCK_SIZE *min_block_size,
-                                        BLOCK_SIZE *max_block_size) {
-  int i, j;
-  int index = 0;
-
-  // Check the sb_type for each block that belongs to this region.
-  for (i = 0; i < cm->seq_params.mib_size; ++i) {
-    for (j = 0; j < cm->seq_params.mib_size; ++j) {
-      MB_MODE_INFO *mi = mib[index + j];
-      BLOCK_SIZE sb_type = mi ? mi->sb_type : BLOCK_4X4;
-      *min_block_size = AOMMIN(*min_block_size, sb_type);
-      *max_block_size = AOMMAX(*max_block_size, sb_type);
-    }
-    index += xd->mi_stride;
-  }
-}
-
-// Checks to see if a super block is on a horizontal image edge.
-// In most cases this is the "real" edge unless there are formatting
-// bars embedded in the stream.
-static int active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
-  int top_edge = 0;
-  int bottom_edge = cpi->common.mi_rows;
-  int is_active_h_edge = 0;
-
-  // For two pass account for any formatting bars detected.
-  if (cpi->oxcf.pass == 2) {
-    const TWO_PASS *const twopass = &cpi->twopass;
-
-    // The inactive region is specified in MBs not mi units.
-    // The image edge is in the following MB row.
-    top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
-
-    bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
-    bottom_edge = AOMMAX(top_edge, bottom_edge);
-  }
-
-  if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
-      ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
-    is_active_h_edge = 1;
-  }
-  return is_active_h_edge;
-}
-
-// Checks to see if a super block is on a vertical image edge.
-// In most cases this is the "real" edge unless there are formatting
-// bars embedded in the stream.
-static int active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
-  int left_edge = 0;
-  int right_edge = cpi->common.mi_cols;
-  int is_active_v_edge = 0;
-
-  // For two pass account for any formatting bars detected.
-  if (cpi->oxcf.pass == 2) {
-    const TWO_PASS *const twopass = &cpi->twopass;
-
-    // The inactive region is specified in MBs not mi units.
-    // The image edge is in the following MB row.
-    left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
-
-    right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
-    right_edge = AOMMAX(left_edge, right_edge);
-  }
-
-  if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
-      ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
-    is_active_v_edge = 1;
-  }
-  return is_active_v_edge;
-}
-
-// Checks to see if a super block is at the edge of the active image.
-// In most cases this is the "real" edge unless there are formatting
-// bars embedded in the stream.
-static int active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
-  return active_h_edge(cpi, mi_row, cpi->common.seq_params.mib_size) ||
-         active_v_edge(cpi, mi_col, cpi->common.seq_params.mib_size);
-}
-
-// Look at neighboring blocks and set a min and max partition size based on
-// what they chose.
-static void rd_auto_partition_range(AV1_COMP *cpi, const TileInfo *const tile,
-                                    MACROBLOCKD *const xd, int mi_row,
-                                    int mi_col, BLOCK_SIZE *min_block_size,
-                                    BLOCK_SIZE *max_block_size) {
-  AV1_COMMON *const cm = &cpi->common;
-  MB_MODE_INFO **mi = xd->mi;
-  const int left_in_image = xd->left_available && mi[-1];
-  const int above_in_image = xd->up_available && mi[-xd->mi_stride];
-  const int mi_rows_remaining = tile->mi_row_end - mi_row;
-  const int mi_cols_remaining = tile->mi_col_end - mi_col;
-  int bh, bw;
-  BLOCK_SIZE min_size = BLOCK_4X4;
-  BLOCK_SIZE max_size = BLOCK_LARGEST;
-
-  // Trap case where we do not have a prediction.
-  if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
-    // Default "min to max" and "max to min"
-    min_size = BLOCK_LARGEST;
-    max_size = BLOCK_4X4;
-
-    // NOTE: each call to get_sb_partition_size_range() uses the previous
-    // passed in values for min and max as a starting point.
-    // Find the min and max partition used in previous frame at this location
-    if (cm->frame_type != KEY_FRAME) {
-      MB_MODE_INFO **prev_mi =
-          &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
-      get_sb_partition_size_range(cm, xd, prev_mi, &min_size, &max_size);
-    }
-    // Find the min and max partition sizes used in the left superblock
-    if (left_in_image) {
-      MB_MODE_INFO **left_sb_mi = &mi[-cm->seq_params.mib_size];
-      get_sb_partition_size_range(cm, xd, left_sb_mi, &min_size, &max_size);
-    }
-    // Find the min and max partition sizes used in the above suprblock.
-    if (above_in_image) {
-      MB_MODE_INFO **above_sb_mi =
-          &mi[-xd->mi_stride * cm->seq_params.mib_size];
-      get_sb_partition_size_range(cm, xd, above_sb_mi, &min_size, &max_size);
-    }
-
-    // Adjust observed min and max for "relaxed" auto partition case.
-    if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
-      min_size = min_partition_size[min_size];
-      max_size = max_partition_size[max_size];
-    }
-  }
-
-  // Check border cases where max and min from neighbors may not be legal.
-  max_size = find_partition_size(max_size, mi_rows_remaining, mi_cols_remaining,
-                                 &bh, &bw);
-  min_size = AOMMIN(min_size, max_size);
-
-  // Test for blocks at the edge of the active image.
-  // This may be the actual edge of the image or where there are formatting
-  // bars.
-  if (active_edge_sb(cpi, mi_row, mi_col)) {
-    min_size = BLOCK_4X4;
-  } else {
-    min_size = AOMMIN(cpi->sf.rd_auto_partition_min_limit, min_size);
-  }
-
-  // When use_square_partition_only is true, make sure at least one square
-  // partition is allowed by selecting the next smaller square size as
-  // *min_block_size.
-  if (min_size >= cpi->sf.use_square_partition_only_threshold) {
-    min_size = AOMMIN(min_size, next_square_size[max_size]);
-  }
-
-  *min_block_size = AOMMIN(min_size, cm->seq_params.sb_size);
-  *max_block_size = AOMMIN(max_size, cm->seq_params.sb_size);
-}
-
-// TODO(jingning) refactor functions setting partition search range
-static void set_partition_range(const AV1_COMMON *const cm,
-                                const MACROBLOCKD *const xd, int mi_row,
-                                int mi_col, BLOCK_SIZE bsize,
-                                BLOCK_SIZE *const min_bs,
-                                BLOCK_SIZE *const max_bs) {
-  const int mi_width = mi_size_wide[bsize];
-  const int mi_height = mi_size_high[bsize];
-  int idx, idy;
-
-  const int idx_str = cm->mi_stride * mi_row + mi_col;
-  MB_MODE_INFO **const prev_mi = &cm->prev_mi_grid_visible[idx_str];
-  BLOCK_SIZE min_size = cm->seq_params.sb_size;  // default values
-  BLOCK_SIZE max_size = BLOCK_4X4;
-
-  if (prev_mi) {
-    for (idy = 0; idy < mi_height; ++idy) {
-      for (idx = 0; idx < mi_width; ++idx) {
-        const MB_MODE_INFO *const mi = prev_mi[idy * cm->mi_stride + idx];
-        const BLOCK_SIZE bs = mi ? mi->sb_type : bsize;
-        min_size = AOMMIN(min_size, bs);
-        max_size = AOMMAX(max_size, bs);
-      }
-    }
-  }
-
-  if (xd->left_available) {
-    for (idy = 0; idy < mi_height; ++idy) {
-      const MB_MODE_INFO *const mi = xd->mi[idy * cm->mi_stride - 1];
-      const BLOCK_SIZE bs = mi ? mi->sb_type : bsize;
-      min_size = AOMMIN(min_size, bs);
-      max_size = AOMMAX(max_size, bs);
-    }
-  }
-
-  if (xd->up_available) {
-    for (idx = 0; idx < mi_width; ++idx) {
-      const MB_MODE_INFO *const mi = xd->mi[idx - cm->mi_stride];
-      const BLOCK_SIZE bs = mi ? mi->sb_type : bsize;
-      min_size = AOMMIN(min_size, bs);
-      max_size = AOMMAX(max_size, bs);
-    }
-  }
-
-  if (min_size == max_size) {
-    min_size = min_partition_size[min_size];
-    max_size = max_partition_size[max_size];
-  }
-
-  *min_bs = AOMMIN(min_size, cm->seq_params.sb_size);
-  *max_bs = AOMMIN(max_size, cm->seq_params.sb_size);
-}
-
-static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
-  memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
-}
-
-static INLINE void load_pred_mv(MACROBLOCK *x,
-                                const PICK_MODE_CONTEXT *const ctx) {
-  memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
-}
-
-#if CONFIG_FP_MB_STATS
-const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
-  0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120,
-  // TODO(debargha): What are the correct numbers here?
-  130, 130, 150
-};
-const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
-  0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120,
-  // TODO(debargha): What are the correct numbers here?
-  160, 160, 240
-};
-const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6,
-  // TODO(debargha): What are the correct numbers here?
-  8, 8, 10
-};
-
-typedef enum {
-  MV_ZERO = 0,
-  MV_LEFT = 1,
-  MV_UP = 2,
-  MV_RIGHT = 3,
-  MV_DOWN = 4,
-  MV_INVALID
-} MOTION_DIRECTION;
-
-static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
-  if (fp_byte & FPMB_MOTION_ZERO_MASK) {
-    return MV_ZERO;
-  } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
-    return MV_LEFT;
-  } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
-    return MV_RIGHT;
-  } else if (fp_byte & FPMB_MOTION_UP_MASK) {
-    return MV_UP;
-  } else {
-    return MV_DOWN;
-  }
-}
-
-static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
-                                           MOTION_DIRECTION that_mv) {
-  if (this_mv == that_mv) {
-    return 0;
-  } else {
-    return abs(this_mv - that_mv) == 2 ? 2 : 1;
-  }
-}
-#endif
-
-// Try searching for an encoding for the given subblock. Returns zero if the
-// rdcost is already too high (to tell the caller not to bother searching for
-// encodings of further subblocks)
-static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
-                           TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last,
-                           int mi_row, int mi_col, BLOCK_SIZE subsize,
-                           RD_STATS *best_rdc, RD_STATS *sum_rdc,
-                           RD_STATS *this_rdc, PARTITION_TYPE partition,
-                           PICK_MODE_CONTEXT *prev_ctx,
-                           PICK_MODE_CONTEXT *this_ctx) {
-#define RTS_X_RATE_NOCOEF_ARG
-#define RTS_MAX_RDCOST best_rdc->rdcost
-
-  MACROBLOCK *const x = &td->mb;
-
-  if (cpi->sf.adaptive_motion_search) load_pred_mv(x, prev_ctx);
-
-  const int64_t rdcost_remaining = best_rdc->rdcost == INT64_MAX
-                                       ? INT64_MAX
-                                       : (best_rdc->rdcost - sum_rdc->rdcost);
-
-  rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc,
-                   RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx,
-                   rdcost_remaining);
-
-  if (this_rdc->rate == INT_MAX) {
-    sum_rdc->rdcost = INT64_MAX;
-  } else {
-    sum_rdc->rate += this_rdc->rate;
-    sum_rdc->dist += this_rdc->dist;
-    sum_rdc->rdcost += this_rdc->rdcost;
-  }
-
-  if (sum_rdc->rdcost >= RTS_MAX_RDCOST) return 0;
-
-  if (!is_last) {
-    update_state(cpi, tile_data, td, this_ctx, mi_row, mi_col, subsize, 1);
-    encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
-                      subsize, NULL);
-  }
-
-  return 1;
-
-#undef RTS_X_RATE_NOCOEF_ARG
-#undef RTS_MAX_RDCOST
-}
-
-static void rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
-                               TileDataEnc *tile_data, TOKENEXTRA **tp,
-                               PC_TREE *pc_tree, RD_STATS *best_rdc,
-                               PICK_MODE_CONTEXT ctxs[3],
-                               PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
-                               BLOCK_SIZE bsize, PARTITION_TYPE partition,
-                               int mi_row0, int mi_col0, BLOCK_SIZE subsize0,
-                               int mi_row1, int mi_col1, BLOCK_SIZE subsize1,
-                               int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  RD_STATS sum_rdc, this_rdc;
-#define RTP_STX_TRY_ARGS
-  int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-  av1_init_rd_stats(&sum_rdc);
-  sum_rdc.rate = x->partition_cost[pl][partition];
-  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-  if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0,
-                       best_rdc, &sum_rdc, &this_rdc,
-                       RTP_STX_TRY_ARGS partition, ctx, &ctxs[0]))
-    return;
-
-  if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1,
-                       best_rdc, &sum_rdc, &this_rdc,
-                       RTP_STX_TRY_ARGS partition, &ctxs[0], &ctxs[1]))
-    return;
-
-  // With the new layout of mixed partitions for PARTITION_HORZ_B and
-  // PARTITION_VERT_B, the last subblock might start past halfway through the
-  // main block, so we might signal it even though the subblock lies strictly
-  // outside the image. In that case, we won't spend any bits coding it and the
-  // difference (obviously) doesn't contribute to the error.
-  const int try_block2 = 1;
-  if (try_block2 &&
-      !rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2,
-                       best_rdc, &sum_rdc, &this_rdc,
-                       RTP_STX_TRY_ARGS partition, &ctxs[1], &ctxs[2]))
-    return;
-
-  if (sum_rdc.rdcost >= best_rdc->rdcost) return;
-
-  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-
-  if (sum_rdc.rdcost >= best_rdc->rdcost) return;
-
-  *best_rdc = sum_rdc;
-  pc_tree->partitioning = partition;
-
-#undef RTP_STX_TRY_ARGS
-}
-
-static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
-  pc_tree->partitioning = PARTITION_NONE;
-  pc_tree->cb_search_range = SEARCH_FULL_PLANE;
-  pc_tree->none.skip = 0;
-
-  if (bsize >= BLOCK_8X8) {
-    BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
-    for (int idx = 0; idx < 4; ++idx)
-      reset_partition(pc_tree->split[idx], subsize);
-  }
-}
-
-static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td,
-                                  TileDataEnc *tile_data, TOKENEXTRA **tp,
-                                  int mi_row, int mi_col, BLOCK_SIZE bsize,
-                                  RD_STATS *rd_cost, int64_t best_rd,
-                                  PC_TREE *pc_tree, int64_t *none_rd) {
-  const AV1_COMMON *const cm = &cpi->common;
-  TileInfo *const tile_info = &tile_data->tile_info;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const int mi_step = mi_size_wide[bsize] / 2;
-  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
-  const TOKENEXTRA *const tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
-  int tmp_partition_cost[PARTITION_TYPES];
-  BLOCK_SIZE subsize;
-  RD_STATS this_rdc, sum_rdc, best_rdc, pn_rdc;
-  const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
-  int do_square_split = bsize_at_least_8x8;
-  const int pl = bsize_at_least_8x8
-                     ? partition_plane_context(xd, mi_row, mi_col, bsize)
-                     : 0;
-  const int *partition_cost =
-      pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
-  const int num_planes = av1_num_planes(cm);
-
-  int64_t split_rd[4] = { 0, 0, 0, 0 };
-
-  // Override skipping rectangular partition operations for edge blocks
-  const int has_rows = (mi_row + mi_step < cm->mi_rows);
-  const int has_cols = (mi_col + mi_step < cm->mi_cols);
-
-  if (none_rd) *none_rd = 0;
-
-  int partition_none_allowed = has_rows && has_cols;
-
-  (void)*tp_orig;
-  (void)split_rd;
-
-  if (best_rd < 0) {
-    pc_tree->none.rdcost = INT64_MAX;
-    pc_tree->none.skip = 0;
-    av1_invalid_rd_stats(rd_cost);
-    return;
-  }
-  pc_tree->pc_tree_stats.valid = 1;
-
-  // Override partition costs at the edges of the frame in the same
-  // way as in read_partition (see decodeframe.c)
-  if (!(has_rows && has_cols)) {
-    assert(bsize_at_least_8x8 && pl >= 0);
-    const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
-    for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX;
-    if (has_cols) {
-      // At the bottom, the two possibilities are HORZ and SPLIT
-      aom_cdf_prob bot_cdf[2];
-      partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
-      static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
-      av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
-    } else if (has_rows) {
-      // At the right, the two possibilities are VERT and SPLIT
-      aom_cdf_prob rhs_cdf[2];
-      partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
-      static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
-      av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
-    } else {
-      // At the bottom right, we always split
-      tmp_partition_cost[PARTITION_SPLIT] = 0;
-    }
-
-    partition_cost = tmp_partition_cost;
-  }
-
-#ifndef NDEBUG
-  // Nothing should rely on the default value of this array (which is just
-  // leftover from encoding the previous block. Setting it to fixed pattern
-  // when debugging.
-  // bit 0, 1, 2 are blk_skip of each plane
-  // bit 4, 5, 6 are initialization checking of each plane
-  memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
-#endif  // NDEBUG
-
-  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
-
-  av1_init_rd_stats(&this_rdc);
-  av1_init_rd_stats(&sum_rdc);
-  av1_invalid_rd_stats(&best_rdc);
-  best_rdc.rdcost = best_rd;
-
-  set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-
-  if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
-    x->mb_energy = av1_log_block_var(cpi, x, bsize);
-
-  xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
-  xd->left_txfm_context =
-      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-  save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
-#if CONFIG_DIST_8X8
-  if (x->using_dist_8x8) {
-    if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
-      do_square_split = 0;
-  }
-#endif
-
-  // PARTITION_NONE
-  if (partition_none_allowed) {
-    int pt_cost = 0;
-    if (bsize_at_least_8x8) {
-      pc_tree->partitioning = PARTITION_NONE;
-      pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
-                    ? partition_cost[PARTITION_NONE]
-                    : 0;
-    }
-    int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
-    int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
-                                     ? INT64_MAX
-                                     : (best_rdc.rdcost - partition_rd_cost);
-    rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
-                     PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
-
-    pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost;
-    pc_tree->pc_tree_stats.skip = ctx_none->skip;
-
-    if (none_rd) *none_rd = this_rdc.rdcost;
-    if (this_rdc.rate != INT_MAX) {
-      if (bsize_at_least_8x8) {
-        this_rdc.rate += pt_cost;
-        this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
-      }
-
-      if (this_rdc.rdcost < best_rdc.rdcost) {
-        // Adjust dist breakout threshold according to the partition size.
-        const int64_t dist_breakout_thr =
-            cpi->sf.partition_search_breakout_dist_thr >>
-            ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
-             (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
-        const int rate_breakout_thr =
-            cpi->sf.partition_search_breakout_rate_thr *
-            num_pels_log2_lookup[bsize];
-
-        best_rdc = this_rdc;
-        if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
-
-        pc_tree->cb_search_range = SEARCH_FULL_PLANE;
-
-        // If all y, u, v transform blocks in this partition are skippable, and
-        // the dist & rate are within the thresholds, the partition search is
-        // terminated for current branch of the partition search tree.
-        // The dist & rate thresholds are set to 0 at speed 0 to disable the
-        // early termination at that speed.
-        if (!x->e_mbd.lossless[xd->mi[0]->segment_id] &&
-            (ctx_none->skippable && best_rdc.dist < dist_breakout_thr &&
-             best_rdc.rate < rate_breakout_thr)) {
-          do_square_split = 0;
-        }
-      }
-    }
-
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-
-  // store estimated motion vector
-  if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
-
-  int64_t temp_best_rdcost = best_rdc.rdcost;
-  pn_rdc = best_rdc;
-
-  // PARTITION_SPLIT
-  if (do_square_split) {
-    int reached_last_index = 0;
-    subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
-    int idx;
-
-    for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) {
-      const int x_idx = (idx & 1) * mi_step;
-      const int y_idx = (idx >> 1) * mi_step;
-
-      if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
-        continue;
-
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-
-      pc_tree->split[idx]->index = idx;
-      int64_t *p_split_rd = &split_rd[idx];
-      // TODO(Cherma) : Account for partition cost while passing best rd to
-      // rd_pick_sqr_partition()
-      rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row + y_idx,
-                            mi_col + x_idx, subsize, &this_rdc,
-                            temp_best_rdcost - sum_rdc.rdcost,
-                            pc_tree->split[idx], p_split_rd);
-
-      pc_tree->pc_tree_stats.sub_block_rdcost[idx] = this_rdc.rdcost;
-      pc_tree->pc_tree_stats.sub_block_skip[idx] =
-          pc_tree->split[idx]->none.skip;
-
-      if (this_rdc.rate == INT_MAX) {
-        sum_rdc.rdcost = INT64_MAX;
-        break;
-      } else {
-        sum_rdc.rate += this_rdc.rate;
-        sum_rdc.dist += this_rdc.dist;
-        sum_rdc.rdcost += this_rdc.rdcost;
-      }
-    }
-    reached_last_index = (idx == 4);
-
-    if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
-      sum_rdc.rate += partition_cost[PARTITION_SPLIT];
-      sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-
-      if (sum_rdc.rdcost < best_rdc.rdcost) {
-        best_rdc = sum_rdc;
-        pc_tree->partitioning = PARTITION_SPLIT;
-      }
-    }
-
-    int has_split = 0;
-    if (pc_tree->partitioning == PARTITION_SPLIT) {
-      for (int cb_idx = 0; cb_idx <= AOMMIN(idx, 3); ++cb_idx) {
-        if (pc_tree->split[cb_idx]->partitioning == PARTITION_SPLIT)
-          ++has_split;
-      }
-
-      if (has_split >= 3 || sum_rdc.rdcost < (pn_rdc.rdcost >> 1)) {
-        pc_tree->cb_search_range = SPLIT_PLANE;
-      }
-    }
-
-    if (pc_tree->partitioning == PARTITION_NONE) {
-      pc_tree->cb_search_range = SEARCH_SAME_PLANE;
-      if (pn_rdc.dist <= sum_rdc.dist)
-        pc_tree->cb_search_range = NONE_PARTITION_PLANE;
-    }
-
-    if (pn_rdc.rate == INT_MAX) pc_tree->cb_search_range = NONE_PARTITION_PLANE;
-
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }  // if (do_split)
-
-  pc_tree->pc_tree_stats.split = pc_tree->partitioning == PARTITION_SPLIT;
-  if (do_square_split) {
-    for (int i = 0; i < 4; ++i) {
-      pc_tree->pc_tree_stats.sub_block_split[i] =
-          pc_tree->split[i]->partitioning == PARTITION_SPLIT;
-    }
-  }
-
-  // TODO(jbb): This code added so that we avoid static analysis
-  // warning related to the fact that best_rd isn't used after this
-  // point.  This code should be refactored so that the duplicate
-  // checks occur in some sub function and thus are used...
-  (void)best_rd;
-  *rd_cost = best_rdc;
-
-  if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
-      pc_tree->index != 3) {
-    if (bsize == cm->seq_params.sb_size) {
-      restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-    } else {
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
-                pc_tree, NULL);
-    }
-  }
-
-  if (bsize == cm->seq_params.sb_size) {
-    assert(best_rdc.rate < INT_MAX);
-    assert(best_rdc.dist < INT64_MAX);
-  } else {
-    assert(tp_orig == *tp);
-  }
-}
-
-#define FEATURE_SIZE 19
-static const float two_pass_split_partition_weights_128[FEATURE_SIZE + 1] = {
-  2.683936f, -0.193620f, -4.106470f, -0.141320f, -0.282289f,
-  0.125296f, -1.134961f, 0.862757f,  -0.418799f, -0.637666f,
-  0.016232f, 0.345013f,  0.018823f,  -0.393394f, -1.130700f,
-  0.695357f, 0.112569f,  -0.341975f, -0.513882f, 5.7488966f,
-};
-
-static const float two_pass_split_partition_weights_64[FEATURE_SIZE + 1] = {
-  2.990993f,  0.423273f,  -0.926544f, 0.454646f,  -0.292698f,
-  -1.311632f, -0.284432f, 0.717141f,  -0.419257f, -0.574760f,
-  -0.674444f, 0.669047f,  -0.374255f, 0.380624f,  -0.804036f,
-  0.264021f,  0.004163f,  1.896802f,  0.924287f,  0.13490619f,
-};
-
-static const float two_pass_split_partition_weights_32[FEATURE_SIZE + 1] = {
-  2.795181f,  -0.136943f, -0.924842f, 0.405330f,  -0.463505f,
-  -0.584076f, -0.831472f, 0.382985f,  -0.597544f, -0.138915f,
-  -1.354350f, 0.466035f,  -0.553961f, 0.213202f,  -1.166429f,
-  0.010776f,  -0.096236f, 2.335084f,  1.699857f,  -0.58178353f,
-};
-
-static const float two_pass_split_partition_weights_16[FEATURE_SIZE + 1] = {
-  1.987888f,  -0.431100f, -1.687703f, 0.262602f,  -0.425298f,
-  -0.463870f, -1.493457f, 0.470917f,  -0.528457f, -0.087700f,
-  -1.815092f, 0.152883f,  -0.337908f, 0.093679f,  -1.548267f,
-  -0.042387f, -0.000861f, 2.556746f,  1.619192f,  0.03643292f,
-};
-
-static const float two_pass_split_partition_weights_8[FEATURE_SIZE + 1] = {
-  2.188344f,  -0.817528f, -2.119219f, 0.000000f,  -0.348167f,
-  -0.658074f, -1.960362f, 0.000000f,  -0.403080f, 0.282699f,
-  -2.061088f, 0.000000f,  -0.431919f, -0.127960f, -1.099550f,
-  0.000000f,  0.121622f,  2.017455f,  2.058228f,  -0.15475988f,
-};
-
-static const float two_pass_none_partition_weights_128[FEATURE_SIZE + 1] = {
-  -1.006689f, 0.777908f,  4.461072f,  -0.395782f, -0.014610f,
-  -0.853863f, 0.729997f,  -0.420477f, 0.282429f,  -1.194595f,
-  3.181220f,  -0.511416f, 0.117084f,  -1.149348f, 1.507990f,
-  -0.477212f, 0.202963f,  -1.469581f, 0.624461f,  -0.89081228f,
-};
-
-static const float two_pass_none_partition_weights_64[FEATURE_SIZE + 1] = {
-  -1.241117f, 0.844878f,  5.638803f,  -0.489780f, -0.108796f,
-  -4.576821f, 1.540624f,  -0.477519f, 0.227791f,  -1.443968f,
-  1.586911f,  -0.505125f, 0.140764f,  -0.464194f, 1.466658f,
-  -0.641166f, 0.195412f,  1.427905f,  2.080007f,  -1.98272777f,
-};
-
-static const float two_pass_none_partition_weights_32[FEATURE_SIZE + 1] = {
-  -2.130825f, 0.476023f,  5.907343f,  -0.516002f, -0.097471f,
-  -2.662754f, 0.614858f,  -0.576728f, 0.085261f,  -0.031901f,
-  0.727842f,  -0.600034f, 0.079326f,  0.324328f,  0.504502f,
-  -0.547105f, -0.037670f, 0.304995f,  0.369018f,  -2.66299987f,
-};
-
-static const float two_pass_none_partition_weights_16[FEATURE_SIZE + 1] = {
-  -1.626410f, 0.872047f,  5.414965f,  -0.554781f, -0.084514f,
-  -3.020550f, 0.467632f,  -0.382280f, 0.199568f,  0.426220f,
-  0.829426f,  -0.467100f, 0.153098f,  0.662994f,  0.327545f,
-  -0.560106f, -0.141610f, 0.403372f,  0.523991f,  -3.02891231f,
-};
-
-static const float two_pass_none_partition_weights_8[FEATURE_SIZE + 1] = {
-  -1.463349f, 0.375376f,  4.751430f, 0.000000f, -0.184451f,
-  -1.655447f, 0.443214f,  0.000000f, 0.127961f, 0.152435f,
-  0.083288f,  0.000000f,  0.143105f, 0.438012f, 0.073238f,
-  0.000000f,  -0.278137f, 0.186134f, 0.073737f, -1.6494962f,
-};
-
-// split_score indicates confidence of picking split partition;
-// none_score indicates confidence of picking none partition;
-static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats,
-                                          BLOCK_SIZE bsize, int *split_score,
-                                          int *none_score) {
-  if (!pc_tree_stats->valid) return 0;
-  const float *split_weights = NULL;
-  const float *none_weights = NULL;
-  switch (bsize) {
-    case BLOCK_4X4: break;
-    case BLOCK_8X8:
-      split_weights = two_pass_split_partition_weights_8;
-      none_weights = two_pass_none_partition_weights_8;
-      break;
-    case BLOCK_16X16:
-      split_weights = two_pass_split_partition_weights_16;
-      none_weights = two_pass_none_partition_weights_16;
-      break;
-    case BLOCK_32X32:
-      split_weights = two_pass_split_partition_weights_32;
-      none_weights = two_pass_none_partition_weights_32;
-      break;
-    case BLOCK_64X64:
-      split_weights = two_pass_split_partition_weights_64;
-      none_weights = two_pass_none_partition_weights_64;
-      break;
-    case BLOCK_128X128:
-      split_weights = two_pass_split_partition_weights_128;
-      none_weights = two_pass_none_partition_weights_128;
-      break;
-    default: assert(0 && "Unexpected bsize.");
-  }
-  if (!split_weights || !none_weights) return 0;
-
-  aom_clear_system_state();
-
-  float features[FEATURE_SIZE];
-  int feature_index = 0;
-  features[feature_index++] = (float)pc_tree_stats->split;
-  features[feature_index++] = (float)pc_tree_stats->skip;
-  const int rdcost = (int)AOMMIN(INT_MAX, pc_tree_stats->rdcost);
-  const int rd_valid = rdcost > 0 && rdcost < 1000000000;
-  features[feature_index++] = (float)rd_valid;
-  for (int i = 0; i < 4; ++i) {
-    features[feature_index++] = (float)pc_tree_stats->sub_block_split[i];
-    features[feature_index++] = (float)pc_tree_stats->sub_block_skip[i];
-    const int sub_rdcost =
-        (int)AOMMIN(INT_MAX, pc_tree_stats->sub_block_rdcost[i]);
-    const int sub_rd_valid = sub_rdcost > 0 && sub_rdcost < 1000000000;
-    features[feature_index++] = (float)sub_rd_valid;
-    // Ratio between the sub-block RD and the whole-block RD.
-    float rd_ratio = 1.0f;
-    if (rd_valid && sub_rd_valid && sub_rdcost < rdcost)
-      rd_ratio = (float)sub_rdcost / (float)rdcost;
-    features[feature_index++] = rd_ratio;
-  }
-  assert(feature_index == FEATURE_SIZE);
-
-  float score_1 = split_weights[FEATURE_SIZE];
-  float score_2 = none_weights[FEATURE_SIZE];
-  for (int i = 0; i < FEATURE_SIZE; ++i) {
-    score_1 += features[i] * split_weights[i];
-    score_2 += features[i] * none_weights[i];
-  }
-  *split_score = (int)(score_1 * 100);
-  *none_score = (int)(score_2 * 100);
-  return 1;
-}
-#undef FEATURE_SIZE
-
-static void ml_prune_rect_partition(const AV1_COMP *const cpi,
-                                    const MACROBLOCK *const x, BLOCK_SIZE bsize,
-                                    int64_t best_rd, int64_t none_rd,
-                                    int64_t *split_rd,
-                                    int *const dst_prune_horz,
-                                    int *const dst_prune_vert) {
-  if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
-  best_rd = AOMMAX(best_rd, 1);
-  const NN_CONFIG *nn_config = NULL;
-  const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f };
-  float cur_thresh = 0.0f;
-  switch (bsize) {
-    case BLOCK_8X8:
-      nn_config = &av1_rect_partition_nnconfig_8;
-      cur_thresh = prob_thresholds[0];
-      break;
-    case BLOCK_16X16:
-      nn_config = &av1_rect_partition_nnconfig_16;
-      cur_thresh = prob_thresholds[1];
-      break;
-    case BLOCK_32X32:
-      nn_config = &av1_rect_partition_nnconfig_32;
-      cur_thresh = prob_thresholds[2];
-      break;
-    case BLOCK_64X64:
-      nn_config = &av1_rect_partition_nnconfig_64;
-      cur_thresh = prob_thresholds[3];
-      break;
-    case BLOCK_128X128:
-      nn_config = &av1_rect_partition_nnconfig_128;
-      cur_thresh = prob_thresholds[4];
-      break;
-    default: assert(0 && "Unexpected bsize.");
-  }
-  if (!nn_config) return;
-  aom_clear_system_state();
-
-  // 1. Compute input features
-  float features[9];
-
-  // RD cost ratios
-  for (int i = 0; i < 5; i++) features[i] = 1.0f;
-  if (none_rd > 0 && none_rd < 1000000000)
-    features[0] = (float)none_rd / (float)best_rd;
-  for (int i = 0; i < 4; i++) {
-    if (split_rd[i] > 0 && split_rd[i] < 1000000000)
-      features[1 + i] = (float)split_rd[i] / (float)best_rd;
-  }
-
-  // Variance ratios
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  int whole_block_variance;
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    whole_block_variance = av1_high_get_sby_perpixel_variance(
-        cpi, &x->plane[0].src, bsize, xd->bd);
-  } else {
-    whole_block_variance =
-        av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
-  }
-  whole_block_variance = AOMMAX(whole_block_variance, 1);
-
-  int split_variance[4];
-  const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
-  struct buf_2d buf;
-  buf.stride = x->plane[0].src.stride;
-  const int bw = block_size_wide[bsize];
-  for (int i = 0; i < 4; ++i) {
-    const int x_idx = (i & 1) * bw / 2;
-    const int y_idx = (i >> 1) * bw / 2;
-    buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      split_variance[i] =
-          av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
-    } else {
-      split_variance[i] = av1_get_sby_perpixel_variance(cpi, &buf, subsize);
-    }
-  }
-
-  for (int i = 0; i < 4; i++)
-    features[5 + i] = (float)split_variance[i] / (float)whole_block_variance;
-
-  // 2. Do the prediction and prune 0-2 partitions based on their probabilities
-  float raw_scores[3] = { 0.0f };
-  av1_nn_predict(features, nn_config, raw_scores);
-  float probs[3] = { 0.0f };
-  av1_nn_softmax(raw_scores, probs, 3);
-
-  // probs[0] is the probability of the fact that both rectangular partitions
-  // are worse than current best_rd
-  if (probs[1] <= cur_thresh) (*dst_prune_horz) = 1;
-  if (probs[2] <= cur_thresh) (*dst_prune_vert) = 1;
-}
-
-// Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be
-// considered.
-static void ml_prune_ab_partition(BLOCK_SIZE bsize, int part_ctx, int var_ctx,
-                                  int64_t best_rd, int64_t horz_rd[2],
-                                  int64_t vert_rd[2], int64_t split_rd[4],
-                                  int *const horza_partition_allowed,
-                                  int *const horzb_partition_allowed,
-                                  int *const verta_partition_allowed,
-                                  int *const vertb_partition_allowed) {
-  if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
-  const NN_CONFIG *nn_config = NULL;
-  switch (bsize) {
-    case BLOCK_8X8: nn_config = NULL; break;
-    case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break;
-    case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break;
-    case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break;
-    case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break;
-    default: assert(0 && "Unexpected bsize.");
-  }
-  if (!nn_config) return;
-
-  aom_clear_system_state();
-
-  // Generate features.
-  float features[10];
-  int feature_index = 0;
-  features[feature_index++] = (float)part_ctx;
-  features[feature_index++] = (float)var_ctx;
-  const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
-  int sub_block_rdcost[8] = { 0 };
-  int rd_index = 0;
-  for (int i = 0; i < 2; ++i) {
-    if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
-      sub_block_rdcost[rd_index] = (int)horz_rd[i];
-    ++rd_index;
-  }
-  for (int i = 0; i < 2; ++i) {
-    if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
-      sub_block_rdcost[rd_index] = (int)vert_rd[i];
-    ++rd_index;
-  }
-  for (int i = 0; i < 4; ++i) {
-    if (split_rd[i] > 0 && split_rd[i] < 1000000000)
-      sub_block_rdcost[rd_index] = (int)split_rd[i];
-    ++rd_index;
-  }
-  for (int i = 0; i < 8; ++i) {
-    // Ratio between the sub-block RD and the whole-block RD.
-    float rd_ratio = 1.0f;
-    if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
-      rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
-    features[feature_index++] = rd_ratio;
-  }
-  assert(feature_index == 10);
-
-  // Calculate scores using the NN model.
-  float score[16] = { 0.0f };
-  av1_nn_predict(features, nn_config, score);
-  int int_score[16];
-  int max_score = -1000;
-  for (int i = 0; i < 16; ++i) {
-    int_score[i] = (int)(100 * score[i]);
-    max_score = AOMMAX(int_score[i], max_score);
-  }
-
-  // Make decisions based on the model scores.
-  int thresh = max_score;
-  switch (bsize) {
-    case BLOCK_16X16: thresh -= 150; break;
-    case BLOCK_32X32: thresh -= 100; break;
-    default: break;
-  }
-  *horza_partition_allowed = 0;
-  *horzb_partition_allowed = 0;
-  *verta_partition_allowed = 0;
-  *vertb_partition_allowed = 0;
-  for (int i = 0; i < 16; ++i) {
-    if (int_score[i] >= thresh) {
-      if ((i >> 0) & 1) *horza_partition_allowed = 1;
-      if ((i >> 1) & 1) *horzb_partition_allowed = 1;
-      if ((i >> 2) & 1) *verta_partition_allowed = 1;
-      if ((i >> 3) & 1) *vertb_partition_allowed = 1;
-    }
-  }
-}
-
-#define FEATURES 18
-#define LABELS 4
-// Use a ML model to predict if horz4 and vert4 should be considered.
-static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                                 BLOCK_SIZE bsize, int part_ctx,
-                                 int64_t best_rd, int64_t horz_rd[2],
-                                 int64_t vert_rd[2], int64_t split_rd[4],
-                                 int *const partition_horz4_allowed,
-                                 int *const partition_vert4_allowed,
-                                 unsigned int pb_source_variance, int mi_row,
-                                 int mi_col) {
-  if (best_rd >= 1000000000) return;
-  const NN_CONFIG *nn_config = NULL;
-  switch (bsize) {
-    case BLOCK_16X16: nn_config = &av1_4_partition_nnconfig_16; break;
-    case BLOCK_32X32: nn_config = &av1_4_partition_nnconfig_32; break;
-    case BLOCK_64X64: nn_config = &av1_4_partition_nnconfig_64; break;
-    default: assert(0 && "Unexpected bsize.");
-  }
-  if (!nn_config) return;
-
-  aom_clear_system_state();
-
-  // Generate features.
-  float features[FEATURES];
-  int feature_index = 0;
-  features[feature_index++] = (float)part_ctx;
-  features[feature_index++] = (float)get_unsigned_bits(pb_source_variance);
-
-  const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
-  int sub_block_rdcost[8] = { 0 };
-  int rd_index = 0;
-  for (int i = 0; i < 2; ++i) {
-    if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
-      sub_block_rdcost[rd_index] = (int)horz_rd[i];
-    ++rd_index;
-  }
-  for (int i = 0; i < 2; ++i) {
-    if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
-      sub_block_rdcost[rd_index] = (int)vert_rd[i];
-    ++rd_index;
-  }
-  for (int i = 0; i < 4; ++i) {
-    if (split_rd[i] > 0 && split_rd[i] < 1000000000)
-      sub_block_rdcost[rd_index] = (int)split_rd[i];
-    ++rd_index;
-  }
-  for (int i = 0; i < 8; ++i) {
-    // Ratio between the sub-block RD and the whole-block RD.
-    float rd_ratio = 1.0f;
-    if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
-      rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
-    features[feature_index++] = rd_ratio;
-  }
-
-  // Get variance of the 1:4 and 4:1 sub-blocks.
-  unsigned int horz_4_source_var[4] = { 0 };
-  unsigned int vert_4_source_var[4] = { 0 };
-  {
-    BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4);
-    BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4);
-    av1_setup_src_planes(x, cpi->source, mi_row, mi_col,
-                         av1_num_planes(&cpi->common));
-    const int src_stride = x->plane[0].src.stride;
-    const uint8_t *src = x->plane[0].src.buf;
-    const MACROBLOCKD *const xd = &x->e_mbd;
-    for (int i = 0; i < 4; ++i) {
-      const uint8_t *horz_src =
-          src + i * block_size_high[horz_4_bs] * src_stride;
-      const uint8_t *vert_src = src + i * block_size_wide[vert_4_bs];
-      unsigned int horz_var, vert_var, sse;
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        switch (xd->bd) {
-          case 10:
-            horz_var = cpi->fn_ptr[horz_4_bs].vf(
-                horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10),
-                0, &sse);
-            vert_var = cpi->fn_ptr[vert_4_bs].vf(
-                vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10),
-                0, &sse);
-            break;
-          case 12:
-            horz_var = cpi->fn_ptr[horz_4_bs].vf(
-                horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12),
-                0, &sse);
-            vert_var = cpi->fn_ptr[vert_4_bs].vf(
-                vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12),
-                0, &sse);
-            break;
-          case 8:
-          default:
-            horz_var = cpi->fn_ptr[horz_4_bs].vf(
-                horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8),
-                0, &sse);
-            vert_var = cpi->fn_ptr[vert_4_bs].vf(
-                vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8),
-                0, &sse);
-            break;
-        }
-        horz_4_source_var[i] =
-            ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]);
-        vert_4_source_var[i] =
-            ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]);
-      } else {
-        horz_var = cpi->fn_ptr[horz_4_bs].vf(horz_src, src_stride, AV1_VAR_OFFS,
-                                             0, &sse);
-        vert_var = cpi->fn_ptr[vert_4_bs].vf(vert_src, src_stride, AV1_VAR_OFFS,
-                                             0, &sse);
-        horz_4_source_var[i] =
-            ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]);
-        vert_4_source_var[i] =
-            ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]);
-      }
-    }
-  }
-
-  const float denom = (float)(pb_source_variance + 1);
-  const float low_b = 0.1f;
-  const float high_b = 10.0f;
-  for (int i = 0; i < 4; ++i) {
-    // Ratio between the 4:1 sub-block variance and the whole-block variance.
-    float var_ratio = (float)(horz_4_source_var[i] + 1) / denom;
-    if (var_ratio < low_b) var_ratio = low_b;
-    if (var_ratio > high_b) var_ratio = high_b;
-    features[feature_index++] = var_ratio;
-  }
-  for (int i = 0; i < 4; ++i) {
-    // Ratio between the 1:4 sub-block RD and the whole-block RD.
-    float var_ratio = (float)(vert_4_source_var[i] + 1) / denom;
-    if (var_ratio < low_b) var_ratio = low_b;
-    if (var_ratio > high_b) var_ratio = high_b;
-    features[feature_index++] = var_ratio;
-  }
-  assert(feature_index == FEATURES);
-
-  // Calculate scores using the NN model.
-  float score[LABELS] = { 0.0f };
-  av1_nn_predict(features, nn_config, score);
-  int int_score[LABELS];
-  int max_score = -1000;
-  for (int i = 0; i < LABELS; ++i) {
-    int_score[i] = (int)(100 * score[i]);
-    max_score = AOMMAX(int_score[i], max_score);
-  }
-
-  // Make decisions based on the model scores.
-  int thresh = max_score;
-  switch (bsize) {
-    case BLOCK_16X16: thresh -= 500; break;
-    case BLOCK_32X32: thresh -= 500; break;
-    case BLOCK_64X64: thresh -= 200; break;
-    default: break;
-  }
-  *partition_horz4_allowed = 0;
-  *partition_vert4_allowed = 0;
-  for (int i = 0; i < LABELS; ++i) {
-    if (int_score[i] >= thresh) {
-      if ((i >> 0) & 1) *partition_horz4_allowed = 1;
-      if ((i >> 1) & 1) *partition_vert4_allowed = 1;
-    }
-  }
-}
-#undef FEATURES
-#undef LABELS
-
-#define FEATURES 4
-// ML-based partition search breakout.
-static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-                               const MACROBLOCK *const x,
-                               const RD_STATS *const rd_stats,
-                               unsigned int pb_source_variance) {
-  const NN_CONFIG *nn_config = NULL;
-  int thresh = 0;
-  switch (bsize) {
-    case BLOCK_8X8:
-      nn_config = &av1_partition_breakout_nnconfig_8;
-      thresh = cpi->sf.ml_partition_search_breakout_thresh[0];
-      break;
-    case BLOCK_16X16:
-      nn_config = &av1_partition_breakout_nnconfig_16;
-      thresh = cpi->sf.ml_partition_search_breakout_thresh[1];
-      break;
-    case BLOCK_32X32:
-      nn_config = &av1_partition_breakout_nnconfig_32;
-      thresh = cpi->sf.ml_partition_search_breakout_thresh[2];
-      break;
-    case BLOCK_64X64:
-      nn_config = &av1_partition_breakout_nnconfig_64;
-      thresh = cpi->sf.ml_partition_search_breakout_thresh[3];
-      break;
-    case BLOCK_128X128:
-      nn_config = &av1_partition_breakout_nnconfig_128;
-      thresh = cpi->sf.ml_partition_search_breakout_thresh[4];
-      break;
-    default: assert(0 && "Unexpected bsize.");
-  }
-  if (!nn_config || thresh < 0) return 0;
-
-  // Generate feature values.
-  float features[FEATURES];
-  int feature_index = 0;
-  aom_clear_system_state();
-
-  const int num_pels_log2 = num_pels_log2_lookup[bsize];
-  float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX);
-  rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
-           rate_f;
-  features[feature_index++] = rate_f;
-
-  const float dist_f =
-      (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2);
-  features[feature_index++] = dist_f;
-
-  features[feature_index++] = (float)pb_source_variance;
-
-  const int dc_q = (int)x->plane[0].dequant_QTX[0];
-  features[feature_index++] = (float)(dc_q * dc_q) / 256.0f;
-  assert(feature_index == FEATURES);
-
-  // Calculate score using the NN model.
-  float score = 0.0f;
-  av1_nn_predict(features, nn_config, &score);
-
-  // Make decision.
-  return (int)(score * 100) >= thresh;
-}
-#undef FEATURES
-
-// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
-// unlikely to be selected depending on previous rate-distortion optimization
-// results, for encoding speed-up.
-static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
-                              TileDataEnc *tile_data, TOKENEXTRA **tp,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize,
-                              RD_STATS *rd_cost, int64_t best_rd,
-                              PC_TREE *pc_tree, int64_t *none_rd) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  TileInfo *const tile_info = &tile_data->tile_info;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const int mi_step = mi_size_wide[bsize] / 2;
-  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
-  const TOKENEXTRA *const tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
-  int tmp_partition_cost[PARTITION_TYPES];
-  BLOCK_SIZE subsize;
-  RD_STATS this_rdc, sum_rdc, best_rdc;
-  const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
-  int do_square_split = bsize_at_least_8x8;
-  const int pl = bsize_at_least_8x8
-                     ? partition_plane_context(xd, mi_row, mi_col, bsize)
-                     : 0;
-  const int *partition_cost =
-      pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
-
-  int do_rectangular_split = 1;
-  int64_t cur_none_rd = 0;
-  int64_t split_rd[4] = { 0, 0, 0, 0 };
-  int64_t horz_rd[2] = { 0, 0 };
-  int64_t vert_rd[2] = { 0, 0 };
-
-  int split_ctx_is_ready[2] = { 0, 0 };
-  int horz_ctx_is_ready = 0;
-  int vert_ctx_is_ready = 0;
-  BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
-
-  if (best_rd < 0) {
-    pc_tree->none.rdcost = INT64_MAX;
-    pc_tree->none.skip = 0;
-    av1_invalid_rd_stats(rd_cost);
-    return;
-  }
-  if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0;
-
-  // Override skipping rectangular partition operations for edge blocks
-  const int has_rows = (mi_row + mi_step < cm->mi_rows);
-  const int has_cols = (mi_col + mi_step < cm->mi_cols);
-  const int xss = x->e_mbd.plane[1].subsampling_x;
-  const int yss = x->e_mbd.plane[1].subsampling_y;
-
-  BLOCK_SIZE min_size = x->min_partition_size;
-  BLOCK_SIZE max_size = x->max_partition_size;
-
-  if (none_rd) *none_rd = 0;
-
-#if CONFIG_FP_MB_STATS
-  unsigned int src_diff_var = UINT_MAX;
-  int none_complexity = 0;
-#endif
-
-  int partition_none_allowed = has_rows && has_cols;
-  int partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8;
-  int partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8;
-
-  (void)*tp_orig;
-
-  // Override partition costs at the edges of the frame in the same
-  // way as in read_partition (see decodeframe.c)
-  if (!(has_rows && has_cols)) {
-    assert(bsize_at_least_8x8 && pl >= 0);
-    const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
-    for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX;
-    if (has_cols) {
-      // At the bottom, the two possibilities are HORZ and SPLIT
-      aom_cdf_prob bot_cdf[2];
-      partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
-      static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
-      av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
-    } else if (has_rows) {
-      // At the right, the two possibilities are VERT and SPLIT
-      aom_cdf_prob rhs_cdf[2];
-      partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
-      static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
-      av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
-    } else {
-      // At the bottom right, we always split
-      tmp_partition_cost[PARTITION_SPLIT] = 0;
-    }
-
-    partition_cost = tmp_partition_cost;
-  }
-
-#ifndef NDEBUG
-  // Nothing should rely on the default value of this array (which is just
-  // leftover from encoding the previous block. Setting it to fixed pattern
-  // when debugging.
-  // bit 0, 1, 2 are blk_skip of each plane
-  // bit 4, 5, 6 are initialization checking of each plane
-  memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
-#endif  // NDEBUG
-
-  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
-
-  av1_init_rd_stats(&this_rdc);
-  av1_invalid_rd_stats(&best_rdc);
-  best_rdc.rdcost = best_rd;
-
-  set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-
-  if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
-    x->mb_energy = av1_log_block_var(cpi, x, bsize);
-
-  if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
-    const int cb_partition_search_ctrl =
-        ((pc_tree->index == 0 || pc_tree->index == 3) +
-         get_chessboard_index(cm->current_video_frame)) &
-        0x1;
-
-    if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
-      set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
-  }
-
-  // Determine partition types in search according to the speed features.
-  // The threshold set here has to be of square block size.
-  if (cpi->sf.auto_min_max_partition_size) {
-    const int no_partition_allowed = (bsize <= max_size && bsize >= min_size);
-    // Note: Further partitioning is NOT allowed when bsize == min_size already.
-    const int partition_allowed = (bsize <= max_size && bsize > min_size);
-    partition_none_allowed &= no_partition_allowed;
-    partition_horz_allowed &= partition_allowed || !has_rows;
-    partition_vert_allowed &= partition_allowed || !has_cols;
-    do_square_split &= bsize > min_size;
-  }
-
-  if (bsize > cpi->sf.use_square_partition_only_threshold) {
-    partition_horz_allowed &= !has_rows;
-    partition_vert_allowed &= !has_cols;
-  }
-
-  if (bsize > BLOCK_4X4 && x->use_cb_search_range &&
-      cpi->sf.auto_min_max_partition_size == 0) {
-    int split_score = 0;
-    int none_score = 0;
-    const int score_valid = ml_prune_2pass_split_partition(
-        &pc_tree->pc_tree_stats, bsize, &split_score, &none_score);
-    if (score_valid) {
-      {
-        const int only_split_thresh = 300;
-        const int no_none_thresh = 250;
-        const int no_split_thresh = 0;
-        if (split_score > only_split_thresh) {
-          partition_none_allowed = 0;
-          partition_horz_allowed = 0;
-          partition_vert_allowed = 0;
-        } else if (split_score > no_none_thresh) {
-          partition_none_allowed = 0;
-        }
-        if (split_score < no_split_thresh) do_square_split = 0;
-      }
-      {
-        const int no_split_thresh = 120;
-        const int no_none_thresh = -120;
-        if (none_score > no_split_thresh && partition_none_allowed)
-          do_square_split = 0;
-        if (none_score < no_none_thresh) partition_none_allowed = 0;
-      }
-    } else {
-      if (pc_tree->cb_search_range == SPLIT_PLANE) {
-        partition_none_allowed = 0;
-        partition_horz_allowed = 0;
-        partition_vert_allowed = 0;
-      }
-      if (pc_tree->cb_search_range == SEARCH_SAME_PLANE) do_square_split = 0;
-      if (pc_tree->cb_search_range == NONE_PARTITION_PLANE) {
-        do_square_split = 0;
-        partition_horz_allowed = 0;
-        partition_vert_allowed = 0;
-      }
-    }
-
-    // Fall back to default values in case all partition modes are rejected.
-    if (partition_none_allowed == 0 && do_square_split == 0 &&
-        partition_horz_allowed == 0 && partition_vert_allowed == 0) {
-      do_square_split = bsize_at_least_8x8;
-      partition_none_allowed = has_rows && has_cols;
-      partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8;
-      partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8;
-    }
-  }
-
-  xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
-  xd->left_txfm_context =
-      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-  save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
-#if CONFIG_FP_MB_STATS
-  if (cpi->use_fp_mb_stats) {
-    set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-    src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
-                                                  mi_col, bsize);
-  }
-
-  // Decide whether we shall split directly and skip searching NONE by using
-  // the first pass block statistics
-  if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_square_split &&
-      partition_none_allowed && src_diff_var > 4 &&
-      cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
-    int mb_row = mi_row >> 1;
-    int mb_col = mi_col >> 1;
-    int mb_row_end =
-        AOMMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
-    int mb_col_end =
-        AOMMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
-    int r, c;
-
-    // compute a complexity measure, basically measure inconsistency of motion
-    // vectors obtained from the first pass in the current block
-    for (r = mb_row; r < mb_row_end; r++) {
-      for (c = mb_col; c < mb_col_end; c++) {
-        const int mb_index = r * cm->mb_cols + c;
-
-        MOTION_DIRECTION this_mv;
-        MOTION_DIRECTION right_mv;
-        MOTION_DIRECTION bottom_mv;
-
-        this_mv =
-            get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
-
-        // to its right
-        if (c != mb_col_end - 1) {
-          right_mv = get_motion_direction_fp(
-              cpi->twopass.this_frame_mb_stats[mb_index + 1]);
-          none_complexity += get_motion_inconsistency(this_mv, right_mv);
-        }
-
-        // to its bottom
-        if (r != mb_row_end - 1) {
-          bottom_mv = get_motion_direction_fp(
-              cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
-          none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
-        }
-
-        // do not count its left and top neighbors to avoid double counting
-      }
-    }
-
-    if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
-      partition_none_allowed = 0;
-    }
-  }
-#endif
-
-  // Ref frames picked in the [i_th] quarter subblock during square partition
-  // RD search. It may be used to prune ref frame selection of rect partitions.
-  int ref_frames_used[4] = {
-    0,
-  };
-
-BEGIN_PARTITION_SEARCH:
-  if (x->must_find_valid_partition) {
-    partition_none_allowed = has_rows && has_cols;
-    partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8;
-    partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8;
-  }
-
-  // Partition block source pixel variance.
-  unsigned int pb_source_variance = UINT_MAX;
-
-#if CONFIG_DIST_8X8
-  if (x->using_dist_8x8) {
-    if (block_size_high[bsize] <= 8) partition_horz_allowed = 0;
-    if (block_size_wide[bsize] <= 8) partition_vert_allowed = 0;
-    if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
-      do_square_split = 0;
-  }
-#endif
-
-  // PARTITION_NONE
-  if (partition_none_allowed) {
-    int pt_cost = 0;
-    if (bsize_at_least_8x8) {
-      pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
-                    ? partition_cost[PARTITION_NONE]
-                    : 0;
-    }
-    int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
-    int64_t best_remain_rdcost = (best_rdc.rdcost == INT64_MAX)
-                                     ? INT64_MAX
-                                     : (best_rdc.rdcost - partition_rd_cost);
-    rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
-                     PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
-    pb_source_variance = x->source_variance;
-    if (none_rd) *none_rd = this_rdc.rdcost;
-    cur_none_rd = this_rdc.rdcost;
-    if (this_rdc.rate != INT_MAX) {
-      if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-        const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame);
-        for (int i = 0; i < 4; ++i) {
-          ref_frames_used[i] |= (1 << ref_type);
-        }
-      }
-      if (bsize_at_least_8x8) {
-        this_rdc.rate += pt_cost;
-        this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
-      }
-
-      if (this_rdc.rdcost < best_rdc.rdcost) {
-        // Adjust dist breakout threshold according to the partition size.
-        const int64_t dist_breakout_thr =
-            cpi->sf.partition_search_breakout_dist_thr >>
-            ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
-             (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
-        const int rate_breakout_thr =
-            cpi->sf.partition_search_breakout_rate_thr *
-            num_pels_log2_lookup[bsize];
-
-        best_rdc = this_rdc;
-        if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
-
-        if ((do_square_split || do_rectangular_split) &&
-            !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
-          const int use_ml_based_breakout =
-              bsize <= cpi->sf.use_square_partition_only_threshold &&
-              bsize > BLOCK_4X4 && xd->bd == 8;
-          if (use_ml_based_breakout) {
-            if (ml_predict_breakout(cpi, bsize, x, &this_rdc,
-                                    pb_source_variance)) {
-              do_square_split = 0;
-              do_rectangular_split = 0;
-            }
-          }
-
-          // If all y, u, v transform blocks in this partition are skippable,
-          // and the dist & rate are within the thresholds, the partition
-          // search is terminated for current branch of the partition search
-          // tree. The dist & rate thresholds are set to 0 at speed 0 to
-          // disable the early termination at that speed.
-          if (best_rdc.dist < dist_breakout_thr &&
-              best_rdc.rate < rate_breakout_thr) {
-            do_square_split = 0;
-            do_rectangular_split = 0;
-          }
-        }
-
-#if CONFIG_FP_MB_STATS
-        // Check if every 16x16 first pass block statistics has zero
-        // motion and the corresponding first pass residue is small enough.
-        // If that is the case, check the difference variance between the
-        // current frame and the last frame. If the variance is small enough,
-        // stop further splitting in RD optimization
-        if (cpi->use_fp_mb_stats && do_square_split &&
-            cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
-          int mb_row = mi_row >> 1;
-          int mb_col = mi_col >> 1;
-          int mb_row_end =
-              AOMMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
-          int mb_col_end =
-              AOMMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
-          int r, c;
-
-          int skip = 1;
-          for (r = mb_row; r < mb_row_end; r++) {
-            for (c = mb_col; c < mb_col_end; c++) {
-              const int mb_index = r * cm->mb_cols + c;
-              if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
-                    FPMB_MOTION_ZERO_MASK) ||
-                  !(cpi->twopass.this_frame_mb_stats[mb_index] &
-                    FPMB_ERROR_SMALL_MASK)) {
-                skip = 0;
-                break;
-              }
-            }
-            if (skip == 0) {
-              break;
-            }
-          }
-          if (skip) {
-            if (src_diff_var == UINT_MAX) {
-              set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-              src_diff_var = get_sby_perpixel_diff_variance(
-                  cpi, &x->plane[0].src, mi_row, mi_col, bsize);
-            }
-            if (src_diff_var < 8) {
-              do_square_split = 0;
-              do_rectangular_split = 0;
-            }
-          }
-        }
-#endif
-      }
-    }
-
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-
-  // store estimated motion vector
-  if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
-
-  // PARTITION_SPLIT
-  if (do_square_split) {
-    av1_init_rd_stats(&sum_rdc);
-    subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
-    sum_rdc.rate = partition_cost[PARTITION_SPLIT];
-    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-
-    int idx;
-    for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
-      const int x_idx = (idx & 1) * mi_step;
-      const int y_idx = (idx >> 1) * mi_step;
-
-      if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
-        continue;
-
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-
-      pc_tree->split[idx]->index = idx;
-      int64_t *p_split_rd = &split_rd[idx];
-      int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
-                                       ? INT64_MAX
-                                       : (best_rdc.rdcost - sum_rdc.rdcost);
-      if (cpi->sf.prune_ref_frame_for_rect_partitions)
-        pc_tree->split[idx]->none.rate = INT_MAX;
-      rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
-                        subsize, &this_rdc, best_remain_rdcost,
-                        pc_tree->split[idx], p_split_rd);
-
-      if (this_rdc.rate == INT_MAX) {
-        sum_rdc.rdcost = INT64_MAX;
-        break;
-      } else {
-        sum_rdc.rate += this_rdc.rate;
-        sum_rdc.dist += this_rdc.dist;
-        sum_rdc.rdcost += this_rdc.rdcost;
-        if (cpi->sf.prune_ref_frame_for_rect_partitions &&
-            pc_tree->split[idx]->none.rate != INT_MAX) {
-          const int ref_type =
-              av1_ref_frame_type(pc_tree->split[idx]->none.mic.ref_frame);
-          ref_frames_used[idx] |= (1 << ref_type);
-        }
-        if (idx <= 1 && (bsize <= BLOCK_8X8 ||
-                         pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
-          const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic;
-          const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-          // Neither palette mode nor cfl predicted
-          if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
-            if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1;
-          }
-        }
-      }
-    }
-    const int reached_last_index = (idx == 4);
-
-    if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
-      sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-
-      if (sum_rdc.rdcost < best_rdc.rdcost) {
-        best_rdc = sum_rdc;
-        pc_tree->partitioning = PARTITION_SPLIT;
-      }
-    } else if (cpi->sf.less_rectangular_check_level > 0) {
-      // skip rectangular partition test when larger block size
-      // gives better rd cost
-      if (cpi->sf.less_rectangular_check_level == 2 || idx <= 2)
-        do_rectangular_split &= !partition_none_allowed;
-    }
-
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }  // if (do_split)
-
-  pc_tree->horizontal[0].skip_ref_frame_mask = 0;
-  pc_tree->horizontal[1].skip_ref_frame_mask = 0;
-  pc_tree->vertical[0].skip_ref_frame_mask = 0;
-  pc_tree->vertical[1].skip_ref_frame_mask = 0;
-  if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-    int used_frames;
-    used_frames = ref_frames_used[0] | ref_frames_used[1];
-    if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
-    used_frames = ref_frames_used[2] | ref_frames_used[3];
-    if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
-    used_frames = ref_frames_used[0] | ref_frames_used[2];
-    if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
-    used_frames = ref_frames_used[1] | ref_frames_used[3];
-    if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
-  }
-
-  int prune_horz = 0;
-  int prune_vert = 0;
-  if (cpi->sf.ml_prune_rect_partition && !frame_is_intra_only(cm) &&
-      (partition_horz_allowed || partition_vert_allowed)) {
-    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
-    ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd,
-                            split_rd, &prune_horz, &prune_vert);
-  }
-
-  // PARTITION_HORZ
-  if (partition_horz_allowed && !prune_horz &&
-      (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) {
-    av1_init_rd_stats(&sum_rdc);
-    subsize = get_partition_subsize(bsize, PARTITION_HORZ);
-    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-    if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-        partition_none_allowed) {
-      pc_tree->horizontal[0].pred_interp_filter =
-          av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
-    }
-    int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
-                                     ? INT64_MAX
-                                     : (best_rdc.rdcost - sum_rdc.rdcost);
-    sum_rdc.rate = partition_cost[PARTITION_HORZ];
-    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-    rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
-                     PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
-                     best_remain_rdcost);
-
-    if (this_rdc.rate == INT_MAX) {
-      sum_rdc.rdcost = INT64_MAX;
-    } else {
-      sum_rdc.rate += this_rdc.rate;
-      sum_rdc.dist += this_rdc.dist;
-      sum_rdc.rdcost += this_rdc.rdcost;
-    }
-    horz_rd[0] = this_rdc.rdcost;
-
-    if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) {
-      const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
-      const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic;
-      const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-      // Neither palette mode nor cfl predicted
-      if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
-        if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1;
-      }
-      update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
-      encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
-                        subsize, NULL);
-
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
-
-      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-          partition_none_allowed) {
-        pc_tree->horizontal[1].pred_interp_filter =
-            av1_extract_interp_filter(ctx_h->mic.interp_filters, 0);
-      }
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
-                       PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
-                       best_rdc.rdcost - sum_rdc.rdcost);
-      horz_rd[1] = this_rdc.rdcost;
-
-      if (this_rdc.rate == INT_MAX) {
-        sum_rdc.rdcost = INT64_MAX;
-      } else {
-        sum_rdc.rate += this_rdc.rate;
-        sum_rdc.dist += this_rdc.dist;
-        sum_rdc.rdcost += this_rdc.rdcost;
-      }
-    }
-
-    if (sum_rdc.rdcost < best_rdc.rdcost) {
-      sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-      if (sum_rdc.rdcost < best_rdc.rdcost) {
-        best_rdc = sum_rdc;
-        pc_tree->partitioning = PARTITION_HORZ;
-      }
-    }
-
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-
-  // PARTITION_VERT
-  if (partition_vert_allowed && !prune_vert &&
-      (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step))) {
-    av1_init_rd_stats(&sum_rdc);
-    subsize = get_partition_subsize(bsize, PARTITION_VERT);
-
-    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-
-    if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-        partition_none_allowed) {
-      pc_tree->vertical[0].pred_interp_filter =
-          av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
-    }
-    sum_rdc.rate = partition_cost[PARTITION_VERT];
-    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-    int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
-                                     ? INT64_MAX
-                                     : (best_rdc.rdcost - sum_rdc.rdcost);
-    rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
-                     PARTITION_VERT, subsize, &pc_tree->vertical[0],
-                     best_remain_rdcost);
-
-    if (this_rdc.rate == INT_MAX) {
-      sum_rdc.rdcost = INT64_MAX;
-    } else {
-      sum_rdc.rate += this_rdc.rate;
-      sum_rdc.dist += this_rdc.dist;
-      sum_rdc.rdcost += this_rdc.rdcost;
-    }
-    vert_rd[0] = this_rdc.rdcost;
-    const int64_t vert_max_rdcost = best_rdc.rdcost;
-    if (sum_rdc.rdcost < vert_max_rdcost && has_cols) {
-      const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic;
-      const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-      // Neither palette mode nor cfl predicted
-      if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
-        if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1;
-      }
-      update_state(cpi, tile_data, td, &pc_tree->vertical[0], mi_row, mi_col,
-                   subsize, 1);
-      encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
-                        subsize, NULL);
-
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-
-      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-          partition_none_allowed) {
-        pc_tree->vertical[1].pred_interp_filter =
-            av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
-      }
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
-                       PARTITION_VERT, subsize, &pc_tree->vertical[1],
-                       best_rdc.rdcost - sum_rdc.rdcost);
-      vert_rd[1] = this_rdc.rdcost;
-
-      if (this_rdc.rate == INT_MAX) {
-        sum_rdc.rdcost = INT64_MAX;
-      } else {
-        sum_rdc.rate += this_rdc.rate;
-        sum_rdc.dist += this_rdc.dist;
-        sum_rdc.rdcost += this_rdc.rdcost;
-      }
-    }
-
-    if (sum_rdc.rdcost < best_rdc.rdcost) {
-      sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-      if (sum_rdc.rdcost < best_rdc.rdcost) {
-        best_rdc = sum_rdc;
-        pc_tree->partitioning = PARTITION_VERT;
-      }
-    }
-
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-
-  if (pb_source_variance == UINT_MAX) {
-    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      pb_source_variance = av1_high_get_sby_perpixel_variance(
-          cpi, &x->plane[0].src, bsize, xd->bd);
-    } else {
-      pb_source_variance =
-          av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
-    }
-  }
-
-  const int ext_partition_allowed =
-      do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed;
-
-  // The standard AB partitions are allowed whenever ext-partition-types are
-  // allowed
-  int horzab_partition_allowed = ext_partition_allowed;
-  int vertab_partition_allowed = ext_partition_allowed;
-
-#if CONFIG_DIST_8X8
-  if (x->using_dist_8x8) {
-    if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) {
-      horzab_partition_allowed = 0;
-      vertab_partition_allowed = 0;
-    }
-  }
-#endif
-
-  if (cpi->sf.prune_ext_partition_types_search_level) {
-    if (cpi->sf.prune_ext_partition_types_search_level == 1) {
-      // TODO(debargha,huisu@google.com): may need to tune the threshold for
-      // pb_source_variance.
-      horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
-                                   (pc_tree->partitioning == PARTITION_NONE &&
-                                    pb_source_variance < 32) ||
-                                   pc_tree->partitioning == PARTITION_SPLIT);
-      vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
-                                   (pc_tree->partitioning == PARTITION_NONE &&
-                                    pb_source_variance < 32) ||
-                                   pc_tree->partitioning == PARTITION_SPLIT);
-    } else {
-      horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
-                                   pc_tree->partitioning == PARTITION_SPLIT);
-      vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
-                                   pc_tree->partitioning == PARTITION_SPLIT);
-    }
-    horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0);
-    horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0);
-    vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0);
-    vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0);
-    split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0);
-    split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0);
-    split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0);
-    split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0);
-  }
-  int horza_partition_allowed = horzab_partition_allowed;
-  int horzb_partition_allowed = horzab_partition_allowed;
-  if (cpi->sf.prune_ext_partition_types_search_level) {
-    const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1];
-    const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3];
-    switch (cpi->sf.prune_ext_partition_types_search_level) {
-      case 1:
-        horza_partition_allowed &= (horz_a_rd / 16 * 14 < best_rdc.rdcost);
-        horzb_partition_allowed &= (horz_b_rd / 16 * 14 < best_rdc.rdcost);
-        break;
-      case 2:
-      default:
-        horza_partition_allowed &= (horz_a_rd / 16 * 15 < best_rdc.rdcost);
-        horzb_partition_allowed &= (horz_b_rd / 16 * 15 < best_rdc.rdcost);
-        break;
-    }
-  }
-
-  int verta_partition_allowed = vertab_partition_allowed;
-  int vertb_partition_allowed = vertab_partition_allowed;
-  if (cpi->sf.prune_ext_partition_types_search_level) {
-    const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2];
-    const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3];
-    switch (cpi->sf.prune_ext_partition_types_search_level) {
-      case 1:
-        verta_partition_allowed &= (vert_a_rd / 16 * 14 < best_rdc.rdcost);
-        vertb_partition_allowed &= (vert_b_rd / 16 * 14 < best_rdc.rdcost);
-        break;
-      case 2:
-      default:
-        verta_partition_allowed &= (vert_a_rd / 16 * 15 < best_rdc.rdcost);
-        vertb_partition_allowed &= (vert_b_rd / 16 * 15 < best_rdc.rdcost);
-        break;
-    }
-  }
-
-  if (cpi->sf.ml_prune_ab_partition && ext_partition_allowed &&
-      partition_horz_allowed && partition_vert_allowed) {
-    // TODO(huisu@google.com): x->source_variance may not be the current block's
-    // variance. The correct one to use is pb_source_variance.
-    // Need to re-train the model to fix it.
-    ml_prune_ab_partition(bsize, pc_tree->partitioning,
-                          get_unsigned_bits(x->source_variance),
-                          best_rdc.rdcost, horz_rd, vert_rd, split_rd,
-                          &horza_partition_allowed, &horzb_partition_allowed,
-                          &verta_partition_allowed, &vertb_partition_allowed);
-  }
-
-  // PARTITION_HORZ_A
-  if (partition_horz_allowed && horza_partition_allowed) {
-    subsize = get_partition_subsize(bsize, PARTITION_HORZ_A);
-    pc_tree->horizontala[0].rd_mode_is_ready = 0;
-    pc_tree->horizontala[1].rd_mode_is_ready = 0;
-    pc_tree->horizontala[2].rd_mode_is_ready = 0;
-    if (split_ctx_is_ready[0]) {
-      av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none);
-      pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A;
-      pc_tree->horizontala[0].rd_mode_is_ready = 1;
-      if (split_ctx_is_ready[1]) {
-        av1_copy_tree_context(&pc_tree->horizontala[1],
-                              &pc_tree->split[1]->none);
-        pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A;
-        pc_tree->horizontala[1].rd_mode_is_ready = 1;
-      }
-    }
-    pc_tree->horizontala[0].skip_ref_frame_mask = 0;
-    pc_tree->horizontala[1].skip_ref_frame_mask = 0;
-    pc_tree->horizontala[2].skip_ref_frame_mask = 0;
-    if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-      int used_frames;
-      used_frames = ref_frames_used[0];
-      if (used_frames)
-        pc_tree->horizontala[0].skip_ref_frame_mask = ~used_frames;
-      used_frames = ref_frames_used[1];
-      if (used_frames)
-        pc_tree->horizontala[1].skip_ref_frame_mask = ~used_frames;
-      used_frames = ref_frames_used[2] | ref_frames_used[3];
-      if (used_frames)
-        pc_tree->horizontala[2].skip_ref_frame_mask = ~used_frames;
-    }
-    rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
-                       pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
-                       PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row,
-                       mi_col + mi_step, bsize2, mi_row + mi_step, mi_col,
-                       subsize);
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-  // PARTITION_HORZ_B
-  if (partition_horz_allowed && horzb_partition_allowed) {
-    subsize = get_partition_subsize(bsize, PARTITION_HORZ_B);
-    pc_tree->horizontalb[0].rd_mode_is_ready = 0;
-    pc_tree->horizontalb[1].rd_mode_is_ready = 0;
-    pc_tree->horizontalb[2].rd_mode_is_ready = 0;
-    if (horz_ctx_is_ready) {
-      av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]);
-      pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B;
-      pc_tree->horizontalb[0].rd_mode_is_ready = 1;
-    }
-    pc_tree->horizontalb[0].skip_ref_frame_mask = 0;
-    pc_tree->horizontalb[1].skip_ref_frame_mask = 0;
-    pc_tree->horizontalb[2].skip_ref_frame_mask = 0;
-    if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-      int used_frames;
-      used_frames = ref_frames_used[0] | ref_frames_used[1];
-      if (used_frames)
-        pc_tree->horizontalb[0].skip_ref_frame_mask = ~used_frames;
-      used_frames = ref_frames_used[2];
-      if (used_frames)
-        pc_tree->horizontalb[1].skip_ref_frame_mask = ~used_frames;
-      used_frames = ref_frames_used[3];
-      if (used_frames)
-        pc_tree->horizontalb[2].skip_ref_frame_mask = ~used_frames;
-    }
-    rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
-                       pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
-                       PARTITION_HORZ_B, mi_row, mi_col, subsize,
-                       mi_row + mi_step, mi_col, bsize2, mi_row + mi_step,
-                       mi_col + mi_step, bsize2);
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-
-  // PARTITION_VERT_A
-  if (partition_vert_allowed && verta_partition_allowed) {
-    subsize = get_partition_subsize(bsize, PARTITION_VERT_A);
-    pc_tree->verticala[0].rd_mode_is_ready = 0;
-    pc_tree->verticala[1].rd_mode_is_ready = 0;
-    pc_tree->verticala[2].rd_mode_is_ready = 0;
-    if (split_ctx_is_ready[0]) {
-      av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none);
-      pc_tree->verticala[0].mic.partition = PARTITION_VERT_A;
-      pc_tree->verticala[0].rd_mode_is_ready = 1;
-    }
-    pc_tree->verticala[0].skip_ref_frame_mask = 0;
-    pc_tree->verticala[1].skip_ref_frame_mask = 0;
-    pc_tree->verticala[2].skip_ref_frame_mask = 0;
-    if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-      int used_frames;
-      used_frames = ref_frames_used[0];
-      if (used_frames) pc_tree->verticala[0].skip_ref_frame_mask = ~used_frames;
-      used_frames = ref_frames_used[2];
-      if (used_frames) pc_tree->verticala[1].skip_ref_frame_mask = ~used_frames;
-      used_frames = ref_frames_used[1] | ref_frames_used[3];
-      if (used_frames) pc_tree->verticala[2].skip_ref_frame_mask = ~used_frames;
-    }
-    rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
-                       pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
-                       PARTITION_VERT_A, mi_row, mi_col, bsize2,
-                       mi_row + mi_step, mi_col, bsize2, mi_row,
-                       mi_col + mi_step, subsize);
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-  // PARTITION_VERT_B
-  if (partition_vert_allowed && vertb_partition_allowed) {
-    subsize = get_partition_subsize(bsize, PARTITION_VERT_B);
-    pc_tree->verticalb[0].rd_mode_is_ready = 0;
-    pc_tree->verticalb[1].rd_mode_is_ready = 0;
-    pc_tree->verticalb[2].rd_mode_is_ready = 0;
-    if (vert_ctx_is_ready) {
-      av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]);
-      pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B;
-      pc_tree->verticalb[0].rd_mode_is_ready = 1;
-    }
-    pc_tree->verticalb[0].skip_ref_frame_mask = 0;
-    pc_tree->verticalb[1].skip_ref_frame_mask = 0;
-    pc_tree->verticalb[2].skip_ref_frame_mask = 0;
-    if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-      int used_frames;
-      used_frames = ref_frames_used[0] | ref_frames_used[2];
-      if (used_frames) pc_tree->verticalb[0].skip_ref_frame_mask = ~used_frames;
-      used_frames = ref_frames_used[1];
-      if (used_frames) pc_tree->verticalb[1].skip_ref_frame_mask = ~used_frames;
-      used_frames = ref_frames_used[3];
-      if (used_frames) pc_tree->verticalb[2].skip_ref_frame_mask = ~used_frames;
-    }
-    rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
-                       pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
-                       PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row,
-                       mi_col + mi_step, bsize2, mi_row + mi_step,
-                       mi_col + mi_step, bsize2);
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-
-  // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or
-  // PARTITION_VERT_4 for this block. This is almost the same as
-  // ext_partition_allowed, except that we don't allow 128x32 or 32x128 blocks,
-  // so we require that bsize is not BLOCK_128X128.
-  const int partition4_allowed =
-      ext_partition_allowed && bsize != BLOCK_128X128;
-  int partition_horz4_allowed = partition4_allowed && partition_horz_allowed;
-  int partition_vert4_allowed = partition4_allowed && partition_vert_allowed;
-  if (cpi->sf.prune_ext_partition_types_search_level == 2) {
-    partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
-                                pc_tree->partitioning == PARTITION_HORZ_A ||
-                                pc_tree->partitioning == PARTITION_HORZ_B ||
-                                pc_tree->partitioning == PARTITION_SPLIT ||
-                                pc_tree->partitioning == PARTITION_NONE);
-    partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
-                                pc_tree->partitioning == PARTITION_VERT_A ||
-                                pc_tree->partitioning == PARTITION_VERT_B ||
-                                pc_tree->partitioning == PARTITION_SPLIT ||
-                                pc_tree->partitioning == PARTITION_NONE);
-  }
-  if (cpi->sf.ml_prune_4_partition && partition4_allowed &&
-      partition_horz_allowed && partition_vert_allowed) {
-    ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning, best_rdc.rdcost,
-                         horz_rd, vert_rd, split_rd, &partition_horz4_allowed,
-                         &partition_vert4_allowed, pb_source_variance, mi_row,
-                         mi_col);
-  }
-
-#if CONFIG_DIST_8X8
-  if (x->using_dist_8x8) {
-    if (block_size_high[bsize] <= 16 || block_size_wide[bsize] <= 16) {
-      partition_horz4_allowed = 0;
-      partition_vert4_allowed = 0;
-    }
-  }
-#endif
-
-  // PARTITION_HORZ_4
-  if (partition_horz4_allowed && has_rows &&
-      (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) {
-    av1_init_rd_stats(&sum_rdc);
-    const int quarter_step = mi_size_high[bsize] / 4;
-    PICK_MODE_CONTEXT *ctx_prev = ctx_none;
-
-    subsize = get_partition_subsize(bsize, PARTITION_HORZ_4);
-    sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
-    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-
-    for (int i = 0; i < 4; ++i) {
-      const int this_mi_row = mi_row + i * quarter_step;
-
-      if (i > 0 && this_mi_row >= cm->mi_rows) break;
-
-      PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
-
-      ctx_this->rd_mode_is_ready = 0;
-      ctx_this->skip_ref_frame_mask = 0;
-      if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-        const int used_frames = i <= 1
-                                    ? (ref_frames_used[0] | ref_frames_used[1])
-                                    : (ref_frames_used[2] | ref_frames_used[3]);
-        if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames;
-      }
-      if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row,
-                           mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
-                           PARTITION_HORZ_4, ctx_prev, ctx_this))
-        break;
-
-      ctx_prev = ctx_this;
-    }
-
-    if (sum_rdc.rdcost < best_rdc.rdcost) {
-      sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-      if (sum_rdc.rdcost < best_rdc.rdcost) {
-        best_rdc = sum_rdc;
-        pc_tree->partitioning = PARTITION_HORZ_4;
-      }
-    }
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-
-  // PARTITION_VERT_4
-  if (partition_vert4_allowed && has_cols &&
-      (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step))) {
-    av1_init_rd_stats(&sum_rdc);
-    const int quarter_step = mi_size_wide[bsize] / 4;
-    PICK_MODE_CONTEXT *ctx_prev = ctx_none;
-
-    subsize = get_partition_subsize(bsize, PARTITION_VERT_4);
-    sum_rdc.rate = partition_cost[PARTITION_VERT_4];
-    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
-
-    for (int i = 0; i < 4; ++i) {
-      const int this_mi_col = mi_col + i * quarter_step;
-
-      if (i > 0 && this_mi_col >= cm->mi_cols) break;
-
-      PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
-
-      ctx_this->rd_mode_is_ready = 0;
-      ctx_this->skip_ref_frame_mask = 0;
-      if (cpi->sf.prune_ref_frame_for_rect_partitions) {
-        const int used_frames = i <= 1
-                                    ? (ref_frames_used[0] | ref_frames_used[2])
-                                    : (ref_frames_used[1] | ref_frames_used[3]);
-        if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames;
-      }
-      if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row,
-                           this_mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
-                           PARTITION_VERT_4, ctx_prev, ctx_this))
-        break;
-
-      ctx_prev = ctx_this;
-    }
-
-    if (sum_rdc.rdcost < best_rdc.rdcost) {
-      sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-      if (sum_rdc.rdcost < best_rdc.rdcost) {
-        best_rdc = sum_rdc;
-        pc_tree->partitioning = PARTITION_VERT_4;
-      }
-    }
-    restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-  }
-
-  if (bsize == cm->seq_params.sb_size && best_rdc.rate == INT_MAX) {
-    // Did not find a valid partition, go back and search again, with less
-    // constraint on which partition types to search.
-    x->must_find_valid_partition = 1;
-    goto BEGIN_PARTITION_SEARCH;
-  }
-
-  // TODO(jbb): This code added so that we avoid static analysis
-  // warning related to the fact that best_rd isn't used after this
-  // point.  This code should be refactored so that the duplicate
-  // checks occur in some sub function and thus are used...
-  (void)best_rd;
-  *rd_cost = best_rdc;
-
-  if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
-      pc_tree->index != 3) {
-    if (bsize == cm->seq_params.sb_size) {
-      x->cb_offset = 0;
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
-                pc_tree, NULL);
-    } else {
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
-                pc_tree, NULL);
-    }
-  }
-
-  if (bsize == cm->seq_params.sb_size) {
-    assert(best_rdc.rate < INT_MAX);
-    assert(best_rdc.dist < INT64_MAX);
-  } else {
-    assert(tp_orig == *tp);
-  }
-}
-
-// Set all the counters as max.
-static void init_first_partition_pass_stats_tables(
-    FIRST_PARTITION_PASS_STATS *stats) {
-  for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
-    memset(stats[i].ref0_counts, 0xff, sizeof(stats[i].ref0_counts));
-    memset(stats[i].ref1_counts, 0xff, sizeof(stats[i].ref1_counts));
-    stats[i].sample_counts = INT_MAX;
-  }
-}
-
-// clear pc_tree_stats
-static INLINE void clear_pc_tree_stats(PC_TREE *pt) {
-  if (pt == NULL) return;
-  pt->pc_tree_stats.valid = 0;
-  for (int i = 0; i < 4; ++i) {
-    clear_pc_tree_stats(pt->split[i]);
-  }
-}
-
-// Minimum number of samples to trigger the
-// mode_pruning_based_on_two_pass_partition_search feature.
-#define FIRST_PARTITION_PASS_MIN_SAMPLES 16
-
-static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
-                             TileDataEnc *tile_data, int mi_row,
-                             TOKENEXTRA **tp) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  const TileInfo *const tile_info = &tile_data->tile_info;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  SPEED_FEATURES *const sf = &cpi->sf;
-  const int leaf_nodes = 256;
-
-  // Initialize the left context for the new SB row
-  av1_zero_left_context(xd);
-
-  // Reset delta for every tile
-  if (mi_row == tile_info->mi_row_start) {
-    if (cm->delta_q_present_flag) xd->current_qindex = cm->base_qindex;
-    if (cm->delta_lf_present_flag) {
-      av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
-    }
-  }
-
-  PC_TREE *const pc_root =
-      td->pc_root[cm->seq_params.mib_size_log2 - MIN_MIB_SIZE_LOG2];
-  // Code each SB in the row
-  for (int mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
-       mi_col += cm->seq_params.mib_size) {
-    av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes);
-    av1_fill_mode_rates(cm, x, xd->tile_ctx);
-
-    if (sf->adaptive_pred_interp_filter) {
-      for (int i = 0; i < leaf_nodes; ++i) {
-        td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
-        td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
-        td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
-        td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
-      }
-    }
-
-    x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
-
-    av1_zero(x->txb_rd_record_8X8);
-    av1_zero(x->txb_rd_record_16X16);
-    av1_zero(x->txb_rd_record_32X32);
-    av1_zero(x->txb_rd_record_64X64);
-    av1_zero(x->txb_rd_record_intra);
-
-    av1_zero(x->pred_mv);
-    pc_root->index = 0;
-
-    const struct segmentation *const seg = &cm->seg;
-    int seg_skip = 0;
-    if (seg->enabled) {
-      const uint8_t *const map =
-          seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
-      const int segment_id =
-          map ? get_segment_id(cm, map, cm->seq_params.sb_size, mi_row, mi_col)
-              : 0;
-      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
-    }
-    xd->cur_frame_force_integer_mv = cm->cur_frame_force_integer_mv;
-
-    x->sb_energy_level = 0;
-    if (cm->delta_q_present_flag) {
-      // Delta-q modulation based on variance
-      av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
-
-      int offset_qindex;
-      if (DELTAQ_MODULATION == 1) {
-        const int block_wavelet_energy_level =
-            av1_block_wavelet_energy_level(cpi, x, cm->seq_params.sb_size);
-        x->sb_energy_level = block_wavelet_energy_level;
-        offset_qindex = av1_compute_deltaq_from_energy_level(
-            cpi, block_wavelet_energy_level);
-      } else {
-        const int block_var_level =
-            av1_log_block_var(cpi, x, cm->seq_params.sb_size);
-        x->sb_energy_level = block_var_level;
-        offset_qindex =
-            av1_compute_deltaq_from_energy_level(cpi, block_var_level);
-      }
-      const int qmask = ~(cm->delta_q_res - 1);
-      int current_qindex = clamp(cm->base_qindex + offset_qindex,
-                                 cm->delta_q_res, 256 - cm->delta_q_res);
-      current_qindex =
-          ((current_qindex - cm->base_qindex + cm->delta_q_res / 2) & qmask) +
-          cm->base_qindex;
-      assert(current_qindex > 0);
-
-      xd->delta_qindex = current_qindex - cm->base_qindex;
-      set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
-      xd->mi[0]->current_qindex = current_qindex;
-      av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
-      if (cpi->oxcf.deltaq_mode == DELTA_Q_LF) {
-        const int lfmask = ~(cm->delta_lf_res - 1);
-        const int delta_lf_from_base =
-            ((offset_qindex / 2 + cm->delta_lf_res / 2) & lfmask);
-
-        // pre-set the delta lf for loop filter. Note that this value is set
-        // before mi is assigned for each block in current superblock
-        for (int j = 0;
-             j < AOMMIN(cm->seq_params.mib_size, cm->mi_rows - mi_row); j++) {
-          for (int k = 0;
-               k < AOMMIN(cm->seq_params.mib_size, cm->mi_cols - mi_col); k++) {
-            cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)]
-                .delta_lf_from_base =
-                clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
-            const int frame_lf_count =
-                av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
-            for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
-              cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)]
-                  .delta_lf[lf_id] =
-                  clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
-            }
-          }
-        }
-      }
-    }
-
-    int dummy_rate;
-    int64_t dummy_dist;
-    RD_STATS dummy_rdc;
-    const int idx_str = cm->mi_stride * mi_row + mi_col;
-    MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str;
-    x->source_variance = UINT_MAX;
-    if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
-      set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
-      const BLOCK_SIZE bsize =
-          seg_skip ? cm->seq_params.sb_size : sf->always_this_block_size;
-      set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
-      rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
-                       cm->seq_params.sb_size, &dummy_rate, &dummy_dist, 1,
-                       pc_root);
-    } else if (cpi->partition_search_skippable_frame) {
-      set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
-      const BLOCK_SIZE bsize =
-          get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
-      set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
-      rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
-                       cm->seq_params.sb_size, &dummy_rate, &dummy_dist, 1,
-                       pc_root);
-    } else {
-      // If required set upper and lower partition size limits
-      if (sf->auto_min_max_partition_size) {
-        set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
-        rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
-                                &x->min_partition_size, &x->max_partition_size);
-      }
-
-      reset_partition(pc_root, cm->seq_params.sb_size);
-      x->use_cb_search_range = 0;
-      init_first_partition_pass_stats_tables(x->first_partition_pass_stats);
-      // Do the first pass if we need two pass partition search
-      if (cpi->sf.two_pass_partition_search &&
-          cpi->sf.use_square_partition_only_threshold > BLOCK_4X4 &&
-          mi_row + mi_size_high[cm->seq_params.sb_size] < cm->mi_rows &&
-          mi_col + mi_size_wide[cm->seq_params.sb_size] < cm->mi_cols &&
-          cm->frame_type != KEY_FRAME) {
-        x->cb_partition_scan = 1;
-        // Reset the stats tables.
-        if (sf->mode_pruning_based_on_two_pass_partition_search)
-          av1_zero(x->first_partition_pass_stats);
-        clear_pc_tree_stats(pc_root);
-        rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col,
-                              cm->seq_params.sb_size, &dummy_rdc, INT64_MAX,
-                              pc_root, NULL);
-        x->cb_partition_scan = 0;
-
-        x->source_variance = UINT_MAX;
-        if (sf->adaptive_pred_interp_filter) {
-          for (int i = 0; i < leaf_nodes; ++i) {
-            td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
-            td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
-            td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
-            td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
-          }
-        }
-
-        x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
-        av1_zero(x->txb_rd_record_8X8);
-        av1_zero(x->txb_rd_record_16X16);
-        av1_zero(x->txb_rd_record_32X32);
-        av1_zero(x->txb_rd_record_64X64);
-        av1_zero(x->txb_rd_record_intra);
-        av1_zero(x->pred_mv);
-        pc_root->index = 0;
-
-        for (int idy = 0; idy < mi_size_high[cm->seq_params.sb_size]; ++idy) {
-          for (int idx = 0; idx < mi_size_wide[cm->seq_params.sb_size]; ++idx) {
-            const int offset = cm->mi_stride * (mi_row + idy) + (mi_col + idx);
-            cm->mi_grid_visible[offset] = 0;
-          }
-        }
-
-        x->use_cb_search_range = 1;
-
-        if (sf->mode_pruning_based_on_two_pass_partition_search) {
-          for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
-            FIRST_PARTITION_PASS_STATS *const stat =
-                &x->first_partition_pass_stats[i];
-            if (stat->sample_counts < FIRST_PARTITION_PASS_MIN_SAMPLES) {
-              // If there are not enough samples collected, make all available.
-              memset(stat->ref0_counts, 0xff, sizeof(stat->ref0_counts));
-              memset(stat->ref1_counts, 0xff, sizeof(stat->ref1_counts));
-            } else if (sf->selective_ref_frame < 2) {
-              // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the
-              // initial partition scan, so we don't eliminate them.
-              stat->ref0_counts[ALTREF2_FRAME] = 0xff;
-              stat->ref1_counts[ALTREF2_FRAME] = 0xff;
-              stat->ref0_counts[BWDREF_FRAME] = 0xff;
-              stat->ref1_counts[BWDREF_FRAME] = 0xff;
-            }
-          }
-        }
-      }
-
-      rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
-                        cm->seq_params.sb_size, &dummy_rdc, INT64_MAX, pc_root,
-                        NULL);
-    }
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-    // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
-    if (cpi->sf.inter_mode_rd_model_estimation && cm->tile_cols == 1 &&
-        cm->tile_rows == 1) {
-      av1_inter_mode_data_fit(tile_data, x->rdmult);
-    }
-#endif
-  }
-}
-
-static void init_encode_frame_mb_context(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-
-  // Copy data over into macro block data structures.
-  av1_setup_src_planes(x, cpi->source, 0, 0, num_planes);
-
-  av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
-                         cm->seq_params.subsampling_y, num_planes);
-}
-
-static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) {
-  if (frame_is_intra_only(&cpi->common)) return INTRA_FRAME;
-  // We will not update the golden frame with an internal overlay frame
-  else if ((cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) ||
-           cpi->rc.is_src_frame_ext_arf)
-    return ALTREF_FRAME;
-  else if (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame ||
-           cpi->refresh_alt_ref_frame)
-    return GOLDEN_FRAME;
-  else
-    // TODO(zoeliu): To investigate whether a frame_type other than
-    // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
-    return LAST_FRAME;
-}
-
-static TX_MODE select_tx_mode(const AV1_COMP *cpi) {
-  if (cpi->common.coded_lossless) return ONLY_4X4;
-  if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
-    return TX_MODE_LARGEST;
-  else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
-           cpi->sf.tx_size_search_method == USE_FAST_RD)
-    return TX_MODE_SELECT;
-  else
-    return cpi->common.tx_mode;
-}
-
-void av1_alloc_tile_data(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int tile_cols = cm->tile_cols;
-  const int tile_rows = cm->tile_rows;
-  int tile_col, tile_row;
-
-  if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
-  CHECK_MEM_ERROR(
-      cm, cpi->tile_data,
-      aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
-  cpi->allocated_tiles = tile_cols * tile_rows;
-
-  for (tile_row = 0; tile_row < tile_rows; ++tile_row)
-    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      TileDataEnc *const tile_data =
-          &cpi->tile_data[tile_row * tile_cols + tile_col];
-      int i, j;
-      for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
-        for (j = 0; j < MAX_MODES; ++j) {
-          tile_data->thresh_freq_fact[i][j] = 32;
-          tile_data->mode_map[i][j] = j;
-        }
-      }
-    }
-}
-
-void av1_init_tile_data(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  const int tile_cols = cm->tile_cols;
-  const int tile_rows = cm->tile_rows;
-  int tile_col, tile_row;
-  TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
-  TOKENLIST *tplist = cpi->tplist[0][0];
-  unsigned int tile_tok = 0;
-  int tplist_count = 0;
-
-  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
-    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      TileDataEnc *const tile_data =
-          &cpi->tile_data[tile_row * tile_cols + tile_col];
-      TileInfo *const tile_info = &tile_data->tile_info;
-      av1_tile_init(tile_info, cm, tile_row, tile_col);
-
-      cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
-      pre_tok = cpi->tile_tok[tile_row][tile_col];
-      tile_tok = allocated_tokens(
-          *tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
-      cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
-      tplist = cpi->tplist[tile_row][tile_col];
-      tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
-      tile_data->allow_update_cdf = !cm->large_scale_tile;
-      tile_data->allow_update_cdf =
-          tile_data->allow_update_cdf && !cm->disable_cdf_update;
-    }
-  }
-}
-
-void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
-                       int tile_col, int mi_row) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  const int tile_cols = cm->tile_cols;
-  TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
-  const TileInfo *const tile_info = &this_tile->tile_info;
-  TOKENEXTRA *tok = NULL;
-  int sb_row_in_tile;
-  int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
-
-  int num_mb_rows_in_sb =
-      ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
-
-  sb_row_in_tile =
-      (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2;
-
-  get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
-                cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
-  cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok;
-
-  encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
-
-  cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok;
-  cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =
-      (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop -
-                     cpi->tplist[tile_row][tile_col][sb_row_in_tile].start);
-
-  assert(
-      (unsigned int)(tok -
-                     cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <=
-      get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
-                      cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes));
-
-  (void)tile_mb_cols;
-  (void)num_mb_rows_in_sb;
-}
-
-void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
-                     int tile_col) {
-  AV1_COMMON *const cm = &cpi->common;
-  TileDataEnc *const this_tile =
-      &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
-  const TileInfo *const tile_info = &this_tile->tile_info;
-  int mi_row;
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-  av1_inter_mode_data_init(this_tile);
-#endif
-
-  av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
-                         tile_info->mi_col_end, tile_row);
-  av1_init_above_context(cm, &td->mb.e_mbd, tile_row);
-
-  // Set up pointers to per thread motion search counters.
-  this_tile->m_search_count = 0;   // Count of motion search hits.
-  this_tile->ex_search_count = 0;  // Exhaustive mesh search hits.
-  td->mb.m_search_count_ptr = &this_tile->m_search_count;
-  td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
-  this_tile->tctx = *cm->fc;
-  td->mb.e_mbd.tile_ctx = &this_tile->tctx;
-
-  cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
-
-  av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator);
-
-  td->intrabc_used_this_tile = 0;
-
-  for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
-       mi_row += cm->seq_params.mib_size) {
-    av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
-  }
-}
-
-static void encode_tiles(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int tile_cols = cm->tile_cols;
-  const int tile_rows = cm->tile_rows;
-  int tile_col, tile_row;
-
-  if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
-    av1_alloc_tile_data(cpi);
-
-  av1_init_tile_data(cpi);
-
-  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
-    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
-      cpi->intrabc_used |= cpi->td.intrabc_used_this_tile;
-    }
-  }
-}
-
-#if CONFIG_FP_MB_STATS
-static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
-                            AV1_COMMON *cm, uint8_t **this_frame_mb_stats) {
-  uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
-                         cm->current_video_frame * cm->MBs * sizeof(uint8_t);
-
-  if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
-
-  *this_frame_mb_stats = mb_stats_in;
-
-  return 1;
-}
-#endif
-
-#define GLOBAL_TRANS_TYPES_ENC 3  // highest motion model to search
-static int gm_get_params_cost(const WarpedMotionParams *gm,
-                              const WarpedMotionParams *ref_gm, int allow_hp) {
-  int params_cost = 0;
-  int trans_bits, trans_prec_diff;
-  switch (gm->wmtype) {
-    case AFFINE:
-    case ROTZOOM:
-      params_cost += aom_count_signed_primitive_refsubexpfin(
-          GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-          (ref_gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS),
-          (gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
-      params_cost += aom_count_signed_primitive_refsubexpfin(
-          GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-          (ref_gm->wmmat[3] >> GM_ALPHA_PREC_DIFF),
-          (gm->wmmat[3] >> GM_ALPHA_PREC_DIFF));
-      if (gm->wmtype >= AFFINE) {
-        params_cost += aom_count_signed_primitive_refsubexpfin(
-            GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-            (ref_gm->wmmat[4] >> GM_ALPHA_PREC_DIFF),
-            (gm->wmmat[4] >> GM_ALPHA_PREC_DIFF));
-        params_cost += aom_count_signed_primitive_refsubexpfin(
-            GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-            (ref_gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
-                (1 << GM_ALPHA_PREC_BITS),
-            (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
-      }
-      AOM_FALLTHROUGH_INTENDED;
-    case TRANSLATION:
-      trans_bits = (gm->wmtype == TRANSLATION)
-                       ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
-                       : GM_ABS_TRANS_BITS;
-      trans_prec_diff = (gm->wmtype == TRANSLATION)
-                            ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
-                            : GM_TRANS_PREC_DIFF;
-      params_cost += aom_count_signed_primitive_refsubexpfin(
-          (1 << trans_bits) + 1, SUBEXPFIN_K,
-          (ref_gm->wmmat[0] >> trans_prec_diff),
-          (gm->wmmat[0] >> trans_prec_diff));
-      params_cost += aom_count_signed_primitive_refsubexpfin(
-          (1 << trans_bits) + 1, SUBEXPFIN_K,
-          (ref_gm->wmmat[1] >> trans_prec_diff),
-          (gm->wmmat[1] >> trans_prec_diff));
-      AOM_FALLTHROUGH_INTENDED;
-    case IDENTITY: break;
-    default: assert(0);
-  }
-  return (params_cost << AV1_PROB_COST_SHIFT);
-}
-
-static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm,
-                              int frame) {
-  (void)num_refs_using_gm;
-  (void)frame;
-  switch (sf->gm_search_type) {
-    case GM_FULL_SEARCH: return 1;
-    case GM_REDUCED_REF_SEARCH:
-      return !(frame == LAST2_FRAME || frame == LAST3_FRAME);
-    case GM_DISABLE_SEARCH: return 0;
-    default: assert(0);
-  }
-  return 1;
-}
-
-// Estimate if the source frame is screen content, based on the portion of
-// blocks that have no more than 4 (experimentally selected) luma colors.
-static int is_screen_content(const uint8_t *src, int use_hbd, int bd,
-                             int stride, int width, int height) {
-  assert(src != NULL);
-  int counts = 0;
-  const int blk_w = 16;
-  const int blk_h = 16;
-  const int limit = 4;
-  for (int r = 0; r + blk_h <= height; r += blk_h) {
-    for (int c = 0; c + blk_w <= width; c += blk_w) {
-      int count_buf[1 << 12];  // Maximum (1 << 12) color levels.
-      const int n_colors =
-          use_hbd ? av1_count_colors_highbd(src + r * stride + c, stride, blk_w,
-                                            blk_h, bd, count_buf)
-                  : av1_count_colors(src + r * stride + c, stride, blk_w, blk_h,
-                                     count_buf);
-      if (n_colors > 1 && n_colors <= limit) counts++;
-    }
-  }
-  // The threshold is 10%.
-  return counts * blk_h * blk_w * 10 > width * height;
-}
-
-static const uint8_t ref_frame_flag_list[REF_FRAMES] = { 0,
-                                                         AOM_LAST_FLAG,
-                                                         AOM_LAST2_FLAG,
-                                                         AOM_LAST3_FLAG,
-                                                         AOM_GOLD_FLAG,
-                                                         AOM_BWD_FLAG,
-                                                         AOM_ALT2_FLAG,
-                                                         AOM_ALT_FLAG };
-
-// Enforce the number of references for each arbitrary frame limited to
-// (INTER_REFS_PER_FRAME - 1)
-static void enforce_max_ref_frames(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  MV_REFERENCE_FRAME ref_frame;
-  int total_valid_refs = 0;
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame])
-      total_valid_refs++;
-  }
-
-  // NOTE(zoeliu): When all the possible reference frames are availble, we
-  // reduce the number of reference frames by 1, following the rules of:
-  // (1) Retain GOLDEN_FARME/ALTEF_FRAME;
-  // (2) Check the earliest 2 remaining reference frames, and remove the one
-  //     with the lower quality factor, otherwise if both have been coded at
-  //     the same quality level, remove the earliest reference frame.
-
-  if (total_valid_refs == INTER_REFS_PER_FRAME) {
-    unsigned int min_ref_offset = UINT_MAX;
-    unsigned int second_min_ref_offset = UINT_MAX;
-    MV_REFERENCE_FRAME earliest_ref_frames[2] = { LAST3_FRAME, LAST2_FRAME };
-    int earliest_buf_idxes[2] = { 0 };
-
-    // Locate the earliest two reference frames except GOLDEN/ALTREF.
-    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-      // Retain GOLDEN/ALTERF
-      if (ref_frame == GOLDEN_FRAME || ref_frame == ALTREF_FRAME) continue;
-
-      const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
-      if (buf_idx >= 0) {
-        const unsigned int ref_offset =
-            cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
-
-        if (min_ref_offset == UINT_MAX) {
-          min_ref_offset = ref_offset;
-          earliest_ref_frames[0] = ref_frame;
-          earliest_buf_idxes[0] = buf_idx;
-        } else {
-          if (get_relative_dist(cm, ref_offset, min_ref_offset) < 0) {
-            second_min_ref_offset = min_ref_offset;
-            earliest_ref_frames[1] = earliest_ref_frames[0];
-            earliest_buf_idxes[1] = earliest_buf_idxes[0];
-
-            min_ref_offset = ref_offset;
-            earliest_ref_frames[0] = ref_frame;
-            earliest_buf_idxes[0] = buf_idx;
-          } else if (second_min_ref_offset == UINT_MAX ||
-                     get_relative_dist(cm, ref_offset, second_min_ref_offset) <
-                         0) {
-            second_min_ref_offset = ref_offset;
-            earliest_ref_frames[1] = ref_frame;
-            earliest_buf_idxes[1] = buf_idx;
-          }
-        }
-      }
-    }
-    // Check the coding quality factors of the two earliest reference frames.
-    RATE_FACTOR_LEVEL ref_rf_level[2];
-    double ref_rf_deltas[2];
-    for (int i = 0; i < 2; ++i) {
-      ref_rf_level[i] = cpi->frame_rf_level[earliest_buf_idxes[i]];
-      ref_rf_deltas[i] = rate_factor_deltas[ref_rf_level[i]];
-    }
-    (void)ref_rf_level;
-    (void)ref_rf_deltas;
-
-#define USE_RF_LEVEL_TO_ENFORCE 1
-#if USE_RF_LEVEL_TO_ENFORCE
-    // If both earliest two reference frames are coded using the same rate-
-    // factor, disable the earliest reference frame; Otherwise disable the
-    // reference frame that uses a lower rate-factor delta.
-    const MV_REFERENCE_FRAME ref_frame_to_disable =
-        (ref_rf_deltas[0] <= ref_rf_deltas[1]) ? earliest_ref_frames[0]
-                                               : earliest_ref_frames[1];
-#else
-    // Always disable the earliest reference frame
-    const MV_REFERENCE_FRAME ref_frame_to_disable = earliest_ref_frames[0];
-#endif  // USE_RF_LEVEL_TO_ENFORCE
-#undef USE_RF_LEVEL_TO_ENFORCE
-
-    switch (ref_frame_to_disable) {
-      case LAST_FRAME: cpi->ref_frame_flags &= ~AOM_LAST_FLAG; break;
-      case LAST2_FRAME: cpi->ref_frame_flags &= ~AOM_LAST2_FLAG; break;
-      case LAST3_FRAME: cpi->ref_frame_flags &= ~AOM_LAST3_FLAG; break;
-      case BWDREF_FRAME: cpi->ref_frame_flags &= ~AOM_BWD_FLAG; break;
-      case ALTREF2_FRAME: cpi->ref_frame_flags &= ~AOM_ALT2_FLAG; break;
-      default: break;
-    }
-  }
-}
-
-static INLINE int av1_refs_are_one_sided(const AV1_COMMON *cm) {
-  assert(!frame_is_intra_only(cm));
-
-  int one_sided_refs = 1;
-  for (int ref = 0; ref < INTER_REFS_PER_FRAME; ++ref) {
-    const int buf_idx = cm->frame_refs[ref].idx;
-    if (buf_idx == INVALID_IDX) continue;
-
-    const int ref_offset =
-        cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
-    if (get_relative_dist(cm, ref_offset, (int)cm->frame_offset) > 0) {
-      one_sided_refs = 0;  // bwd reference
-      break;
-    }
-  }
-  return one_sided_refs;
-}
-
-static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
-                                             int ref_offset[2]) {
-  ref_offset[0] = ref_offset[1] = 0;
-  if (!cm->is_skip_mode_allowed) return;
-
-  const int buf_idx_0 = cm->frame_refs[cm->ref_frame_idx_0].idx;
-  const int buf_idx_1 = cm->frame_refs[cm->ref_frame_idx_1].idx;
-  assert(buf_idx_0 != INVALID_IDX && buf_idx_1 != INVALID_IDX);
-
-  ref_offset[0] = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset;
-  ref_offset[1] = cm->buffer_pool->frame_bufs[buf_idx_1].cur_frame_offset;
-}
-
-static int check_skip_mode_enabled(AV1_COMP *const cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-
-  av1_setup_skip_mode_allowed(cm);
-  if (!cm->is_skip_mode_allowed) return 0;
-
-  // Turn off skip mode if the temporal distances of the reference pair to the
-  // current frame are different by more than 1 frame.
-  const int cur_offset = (int)cm->frame_offset;
-  int ref_offset[2];
-  get_skip_mode_ref_offsets(cm, ref_offset);
-  const int cur_to_ref0 = get_relative_dist(cm, cur_offset, ref_offset[0]);
-  const int cur_to_ref1 = abs(get_relative_dist(cm, cur_offset, ref_offset[1]));
-  if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
-
-  // High Latency: Turn off skip mode if all refs are fwd.
-  if (cpi->all_one_sided_refs && cpi->oxcf.lag_in_frames > 0) return 0;
-
-  static const int flag_list[REF_FRAMES] = { 0,
-                                             AOM_LAST_FLAG,
-                                             AOM_LAST2_FLAG,
-                                             AOM_LAST3_FLAG,
-                                             AOM_GOLD_FLAG,
-                                             AOM_BWD_FLAG,
-                                             AOM_ALT2_FLAG,
-                                             AOM_ALT_FLAG };
-  const int ref_frame[2] = { cm->ref_frame_idx_0 + LAST_FRAME,
-                             cm->ref_frame_idx_1 + LAST_FRAME };
-  if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) ||
-      !(cpi->ref_frame_flags & flag_list[ref_frame[1]]))
-    return 0;
-
-  return 1;
-}
-
-// Function to decide if we can skip the global motion parameter computation
-// for a particular ref frame
-static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) {
-  if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) &&
-      cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) {
-    return get_relative_dist(
-               cm, cm->cur_frame->ref_frame_offset[ref_frame - LAST_FRAME],
-               cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0;
-  }
-  return 0;
-}
-
-static void set_default_interp_skip_flags(AV1_COMP *cpi) {
-  const int num_planes = av1_num_planes(&cpi->common);
-  cpi->default_interp_skip_flags = (num_planes == 1)
-                                       ? DEFAULT_LUMA_INTERP_SKIP_FLAG
-                                       : DEFAULT_INTERP_SKIP_FLAG;
-}
-
-static void encode_frame_internal(AV1_COMP *cpi) {
-  ThreadData *const td = &cpi->td;
-  MACROBLOCK *const x = &td->mb;
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  RD_COUNTS *const rdc = &cpi->td.rd_counts;
-  int i;
-
-  x->min_partition_size = AOMMIN(x->min_partition_size, cm->seq_params.sb_size);
-  x->max_partition_size = AOMMIN(x->max_partition_size, cm->seq_params.sb_size);
-#if CONFIG_DIST_8X8
-  x->using_dist_8x8 = cpi->oxcf.using_dist_8x8;
-  x->tune_metric = cpi->oxcf.tuning;
-#endif
-  cm->setup_mi(cm);
-
-  xd->mi = cm->mi_grid_visible;
-  xd->mi[0] = cm->mi;
-
-  av1_zero(*td->counts);
-  av1_zero(rdc->comp_pred_diff);
-
-  if (frame_is_intra_only(cm)) {
-    if (cm->seq_params.force_screen_content_tools == 2) {
-      cm->allow_screen_content_tools =
-          cpi->oxcf.content == AOM_CONTENT_SCREEN ||
-          is_screen_content(cpi->source->y_buffer,
-                            cpi->source->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
-                            cpi->source->y_stride, cpi->source->y_width,
-                            cpi->source->y_height);
-    } else {
-      cm->allow_screen_content_tools =
-          cm->seq_params.force_screen_content_tools;
-    }
-  }
-
-  // Allow intrabc when screen content tools are enabled.
-  cm->allow_intrabc = cm->allow_screen_content_tools;
-  // Reset the flag.
-  cpi->intrabc_used = 0;
-  // Need to disable intrabc when superres is selected
-  if (av1_superres_scaled(cm)) {
-    cm->allow_intrabc = 0;
-  }
-
-  if (cpi->oxcf.pass != 1 && av1_use_hash_me(cm)) {
-    // add to hash table
-    const int pic_width = cpi->source->y_crop_width;
-    const int pic_height = cpi->source->y_crop_height;
-    uint32_t *block_hash_values[2][2];
-    int8_t *is_block_same[2][3];
-    int k, j;
-
-    for (k = 0; k < 2; k++) {
-      for (j = 0; j < 2; j++) {
-        CHECK_MEM_ERROR(cm, block_hash_values[k][j],
-                        aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
-      }
-
-      for (j = 0; j < 3; j++) {
-        CHECK_MEM_ERROR(cm, is_block_same[k][j],
-                        aom_malloc(sizeof(int8_t) * pic_width * pic_height));
-      }
-    }
-
-    av1_hash_table_create(&cm->cur_frame->hash_table);
-    av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
-                                      is_block_same[0], &cpi->td.mb);
-    av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0],
-                                  block_hash_values[1], is_block_same[0],
-                                  is_block_same[1], &cpi->td.mb);
-    av1_add_to_hash_map_by_row_with_precal_data(
-        &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
-        pic_width, pic_height, 4);
-    av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1],
-                                  block_hash_values[0], is_block_same[1],
-                                  is_block_same[0], &cpi->td.mb);
-    av1_add_to_hash_map_by_row_with_precal_data(
-        &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
-        pic_width, pic_height, 8);
-    av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0],
-                                  block_hash_values[1], is_block_same[0],
-                                  is_block_same[1], &cpi->td.mb);
-    av1_add_to_hash_map_by_row_with_precal_data(
-        &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
-        pic_width, pic_height, 16);
-    av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1],
-                                  block_hash_values[0], is_block_same[1],
-                                  is_block_same[0], &cpi->td.mb);
-    av1_add_to_hash_map_by_row_with_precal_data(
-        &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
-        pic_width, pic_height, 32);
-    av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0],
-                                  block_hash_values[1], is_block_same[0],
-                                  is_block_same[1], &cpi->td.mb);
-    av1_add_to_hash_map_by_row_with_precal_data(
-        &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
-        pic_width, pic_height, 64);
-
-    av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1],
-                                  block_hash_values[0], is_block_same[1],
-                                  is_block_same[0], &cpi->td.mb);
-    av1_add_to_hash_map_by_row_with_precal_data(
-        &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
-        pic_width, pic_height, 128);
-
-    for (k = 0; k < 2; k++) {
-      for (j = 0; j < 2; j++) {
-        aom_free(block_hash_values[k][j]);
-      }
-
-      for (j = 0; j < 3; j++) {
-        aom_free(is_block_same[k][j]);
-      }
-    }
-  }
-
-  for (i = 0; i < MAX_SEGMENTS; ++i) {
-    const int qindex = cm->seg.enabled
-                           ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
-                           : cm->base_qindex;
-    xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
-                      cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 &&
-                      cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0;
-    if (xd->lossless[i]) cpi->has_lossless_segment = 1;
-    xd->qindex[i] = qindex;
-    if (xd->lossless[i]) {
-      cpi->optimize_seg_arr[i] = 0;
-    } else {
-      cpi->optimize_seg_arr[i] = cpi->optimize_speed_feature;
-    }
-  }
-  cm->coded_lossless = is_coded_lossless(cm, xd);
-  cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
-
-  cm->tx_mode = select_tx_mode(cpi);
-
-  // Fix delta q resolution for the moment
-  cm->delta_q_res = DEFAULT_DELTA_Q_RES;
-  // Set delta_q_present_flag before it is used for the first time
-  cm->delta_lf_res = DEFAULT_DELTA_LF_RES;
-  cm->delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q;
-  cm->delta_lf_present_flag = cpi->oxcf.deltaq_mode == DELTA_Q_LF;
-  cm->delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
-  // update delta_q_present_flag and delta_lf_present_flag based on base_qindex
-  cm->delta_q_present_flag &= cm->base_qindex > 0;
-  cm->delta_lf_present_flag &= cm->base_qindex > 0;
-
-  av1_frame_init_quantizer(cpi);
-
-  av1_initialize_rd_consts(cpi);
-  av1_initialize_me_consts(cpi, x, cm->base_qindex);
-  init_encode_frame_mb_context(cpi);
-  set_default_interp_skip_flags(cpi);
-  if (cm->prev_frame)
-    cm->last_frame_seg_map = cm->prev_frame->seg_map;
-  else
-    cm->last_frame_seg_map = NULL;
-  cm->current_frame_seg_map = cm->cur_frame->seg_map;
-  if (cm->allow_intrabc || cm->coded_lossless) {
-    av1_set_default_ref_deltas(cm->lf.ref_deltas);
-    av1_set_default_mode_deltas(cm->lf.mode_deltas);
-  } else if (cm->prev_frame) {
-    memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
-    memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
-  }
-  memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
-  memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
-
-  // Special case: set prev_mi to NULL when the previous mode info
-  // context cannot be used.
-  cm->prev_mi = cm->allow_ref_frame_mvs ? cm->prev_mip : NULL;
-
-  x->txb_split_count = 0;
-
-  av1_zero(rdc->global_motion_used);
-  av1_zero(cpi->gmparams_cost);
-#if !CONFIG_GLOBAL_MOTION_SEARCH
-  cpi->global_motion_search_done = 1;
-#endif  // !CONFIG_GLOBAL_MOTION_SEARCH
-  if (cpi->common.frame_type == INTER_FRAME && cpi->source &&
-      !cpi->global_motion_search_done) {
-    YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES];
-    int frame;
-    double params_by_motion[RANSAC_NUM_MOTIONS * (MAX_PARAMDIM - 1)];
-    const double *params_this_motion;
-    int inliers_by_motion[RANSAC_NUM_MOTIONS];
-    WarpedMotionParams tmp_wm_params;
-    static const double kIdentityParams[MAX_PARAMDIM - 1] = {
-      0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
-    };
-    int num_refs_using_gm = 0;
-
-    for (frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
-      ref_buf[frame] = get_ref_frame_buffer(cpi, frame);
-      int pframe;
-      cm->global_motion[frame] = default_warp_params;
-      const WarpedMotionParams *ref_params =
-          cm->prev_frame ? &cm->prev_frame->global_motion[frame]
-                         : &default_warp_params;
-      // check for duplicate buffer
-      for (pframe = ALTREF_FRAME; pframe > frame; --pframe) {
-        if (ref_buf[frame] == ref_buf[pframe]) break;
-      }
-      if (pframe > frame) {
-        memcpy(&cm->global_motion[frame], &cm->global_motion[pframe],
-               sizeof(WarpedMotionParams));
-      } else if (ref_buf[frame] &&
-                 ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
-                 ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
-                 do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) &&
-                 !(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
-        TransformationType model;
-        const int64_t ref_frame_error =
-            av1_frame_error(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
-                            ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
-                            cpi->source->y_buffer, cpi->source->y_width,
-                            cpi->source->y_height, cpi->source->y_stride);
-
-        if (ref_frame_error == 0) continue;
-
-        aom_clear_system_state();
-        for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
-          int64_t best_warp_error = INT64_MAX;
-          // Initially set all params to identity.
-          for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
-            memcpy(params_by_motion + (MAX_PARAMDIM - 1) * i, kIdentityParams,
-                   (MAX_PARAMDIM - 1) * sizeof(*params_by_motion));
-          }
-
-          compute_global_motion_feature_based(
-              model, cpi->source, ref_buf[frame],
-              cpi->common.seq_params.bit_depth, inliers_by_motion,
-              params_by_motion, RANSAC_NUM_MOTIONS);
-
-          for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
-            if (inliers_by_motion[i] == 0) continue;
-
-            params_this_motion = params_by_motion + (MAX_PARAMDIM - 1) * i;
-            convert_model_to_params(params_this_motion, &tmp_wm_params);
-
-            if (tmp_wm_params.wmtype != IDENTITY) {
-              const int64_t warp_error = refine_integerized_param(
-                  &tmp_wm_params, tmp_wm_params.wmtype,
-                  xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
-                  ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
-                  ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
-                  cpi->source->y_buffer, cpi->source->y_width,
-                  cpi->source->y_height, cpi->source->y_stride, 5,
-                  best_warp_error);
-              if (warp_error < best_warp_error) {
-                best_warp_error = warp_error;
-                // Save the wm_params modified by refine_integerized_param()
-                // rather than motion index to avoid rerunning refine() below.
-                memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
-                       sizeof(WarpedMotionParams));
-              }
-            }
-          }
-          if (cm->global_motion[frame].wmtype <= AFFINE)
-            if (!get_shear_params(&cm->global_motion[frame]))
-              cm->global_motion[frame] = default_warp_params;
-
-          if (cm->global_motion[frame].wmtype == TRANSLATION) {
-            cm->global_motion[frame].wmmat[0] =
-                convert_to_trans_prec(cm->allow_high_precision_mv,
-                                      cm->global_motion[frame].wmmat[0]) *
-                GM_TRANS_ONLY_DECODE_FACTOR;
-            cm->global_motion[frame].wmmat[1] =
-                convert_to_trans_prec(cm->allow_high_precision_mv,
-                                      cm->global_motion[frame].wmmat[1]) *
-                GM_TRANS_ONLY_DECODE_FACTOR;
-          }
-
-          // If the best error advantage found doesn't meet the threshold for
-          // this motion type, revert to IDENTITY.
-          if (!is_enough_erroradvantage(
-                  (double)best_warp_error / ref_frame_error,
-                  gm_get_params_cost(&cm->global_motion[frame], ref_params,
-                                     cm->allow_high_precision_mv),
-                  cpi->sf.gm_erroradv_type)) {
-            cm->global_motion[frame] = default_warp_params;
-          }
-          if (cm->global_motion[frame].wmtype != IDENTITY) break;
-        }
-        aom_clear_system_state();
-      }
-      if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++;
-      cpi->gmparams_cost[frame] =
-          gm_get_params_cost(&cm->global_motion[frame], ref_params,
-                             cm->allow_high_precision_mv) +
-          cpi->gmtype_cost[cm->global_motion[frame].wmtype] -
-          cpi->gmtype_cost[IDENTITY];
-    }
-    // clear disabled ref_frames
-    for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
-      const int ref_disabled =
-          !(cpi->ref_frame_flags & ref_frame_flag_list[frame]);
-      if (ref_disabled && cpi->sf.recode_loop != DISALLOW_RECODE) {
-        cpi->gmparams_cost[frame] = 0;
-        cm->global_motion[frame] = default_warp_params;
-      }
-    }
-    cpi->global_motion_search_done = 1;
-  }
-  memcpy(cm->cur_frame->global_motion, cm->global_motion,
-         REF_FRAMES * sizeof(WarpedMotionParams));
-
-  av1_setup_motion_field(cm);
-
-  cpi->all_one_sided_refs =
-      frame_is_intra_only(cm) ? 0 : av1_refs_are_one_sided(cm);
-
-  cm->skip_mode_flag = check_skip_mode_enabled(cpi);
-
-  {
-    struct aom_usec_timer emr_timer;
-    aom_usec_timer_start(&emr_timer);
-
-#if CONFIG_FP_MB_STATS
-    if (cpi->use_fp_mb_stats) {
-      input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
-                       &cpi->twopass.this_frame_mb_stats);
-    }
-#endif
-
-    if (cpi->row_mt && (cpi->oxcf.max_threads > 1))
-      av1_encode_tiles_mt(cpi);
-    else if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
-      av1_encode_tiles_mt(cpi);
-    else
-      encode_tiles(cpi);
-
-    aom_usec_timer_mark(&emr_timer);
-    cpi->time_encode_sb_row += aom_usec_timer_elapsed(&emr_timer);
-  }
-
-  // If intrabc is allowed but never selected, reset the allow_intrabc flag.
-  if (cm->allow_intrabc && !cpi->intrabc_used) cm->allow_intrabc = 0;
-  if (cm->allow_intrabc) cm->delta_lf_present_flag = 0;
-}
-
-void av1_encode_frame(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  // Indicates whether or not to use a default reduced set for ext-tx
-  // rather than the potential full set of 16 transforms
-  cm->reduced_tx_set_used = 0;
-
-  if (cm->show_frame == 0) {
-    int arf_offset = AOMMIN(
-        (MAX_GF_INTERVAL - 1),
-        cpi->twopass.gf_group.arf_src_offset[cpi->twopass.gf_group.index]);
-    int brf_offset =
-        cpi->twopass.gf_group.brf_src_offset[cpi->twopass.gf_group.index];
-    arf_offset = AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
-    cm->frame_offset = cm->current_video_frame + arf_offset;
-  } else {
-    cm->frame_offset = cm->current_video_frame;
-  }
-  cm->frame_offset %= (1 << (cm->seq_params.order_hint_bits_minus_1 + 1));
-
-  // Make sure segment_id is no larger than last_active_segid.
-  if (cm->seg.enabled && cm->seg.update_map) {
-    const int mi_rows = cm->mi_rows;
-    const int mi_cols = cm->mi_cols;
-    const int last_active_segid = cm->seg.last_active_segid;
-    uint8_t *map = cpi->segmentation_map;
-    for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
-      for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
-        map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
-      }
-      map += mi_cols;
-    }
-  }
-
-  av1_setup_frame_buf_refs(cm);
-  if (cpi->sf.selective_ref_frame >= 2) enforce_max_ref_frames(cpi);
-  av1_setup_frame_sign_bias(cm);
-
-#if CONFIG_MISMATCH_DEBUG
-  mismatch_reset_frame(num_planes);
-#else
-  (void)num_planes;
-#endif
-
-  cpi->allow_comp_inter_inter = !frame_is_intra_only(cm);
-
-  if (cpi->sf.frame_parameter_update) {
-    int i;
-    RD_OPT *const rd_opt = &cpi->rd;
-    RD_COUNTS *const rdc = &cpi->td.rd_counts;
-
-    // This code does a single RD pass over the whole frame assuming
-    // either compound, single or hybrid prediction as per whatever has
-    // worked best for that type of frame in the past.
-    // It also predicts whether another coding mode would have worked
-    // better than this coding mode. If that is the case, it remembers
-    // that for subsequent frames.
-    // It does the same analysis for transform size selection also.
-    //
-    // TODO(zoeliu): To investigate whether a frame_type other than
-    // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
-    const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
-    int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
-    const int is_alt_ref = frame_type == ALTREF_FRAME;
-
-    /* prediction (compound, single or hybrid) mode selection */
-    // NOTE: "is_alt_ref" is true only for OVERLAY/INTNL_OVERLAY frames
-    if (is_alt_ref || !cpi->allow_comp_inter_inter)
-      cm->reference_mode = SINGLE_REFERENCE;
-    else
-      cm->reference_mode = REFERENCE_MODE_SELECT;
-
-    cm->interp_filter = SWITCHABLE;
-    if (cm->large_scale_tile) cm->interp_filter = EIGHTTAP_REGULAR;
-
-    cm->switchable_motion_mode = 1;
-
-    rdc->compound_ref_used_flag = 0;
-    rdc->skip_mode_used_flag = 0;
-
-    encode_frame_internal(cpi);
-
-    for (i = 0; i < REFERENCE_MODES; ++i)
-      mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
-
-    if (cm->reference_mode == REFERENCE_MODE_SELECT) {
-      // Use a flag that includes 4x4 blocks
-      if (rdc->compound_ref_used_flag == 0) {
-        cm->reference_mode = SINGLE_REFERENCE;
-#if CONFIG_ENTROPY_STATS
-        av1_zero(cpi->td.counts->comp_inter);
-#endif  // CONFIG_ENTROPY_STATS
-      }
-    }
-    // Re-check on the skip mode status as reference mode may have been changed.
-    if (frame_is_intra_only(cm) || cm->reference_mode == SINGLE_REFERENCE) {
-      cm->is_skip_mode_allowed = 0;
-      cm->skip_mode_flag = 0;
-    }
-    if (cm->skip_mode_flag && rdc->skip_mode_used_flag == 0)
-      cm->skip_mode_flag = 0;
-
-    if (!cm->large_scale_tile) {
-      if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
-        cm->tx_mode = TX_MODE_LARGEST;
-    }
-  } else {
-    encode_frame_internal(cpi);
-  }
-}
-
-static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
-                              FRAME_COUNTS *counts, TX_SIZE tx_size, int depth,
-                              int blk_row, int blk_col,
-                              uint8_t allow_update_cdf) {
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  const int max_blocks_high = max_block_high(xd, bsize, 0);
-  const int max_blocks_wide = max_block_wide(xd, bsize, 0);
-  int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
-                                   xd->left_txfm_context + blk_row,
-                                   mbmi->sb_type, tx_size);
-  const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
-  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
-
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-  assert(tx_size > TX_4X4);
-
-  if (depth == MAX_VARTX_DEPTH) {
-    // Don't add to counts in this case
-    mbmi->tx_size = tx_size;
-    txfm_partition_update(xd->above_txfm_context + blk_col,
-                          xd->left_txfm_context + blk_row, tx_size, tx_size);
-    return;
-  }
-
-  if (tx_size == plane_tx_size) {
-#if CONFIG_ENTROPY_STATS
-    ++counts->txfm_partition[ctx][0];
-#endif
-    if (allow_update_cdf)
-      update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2);
-    mbmi->tx_size = tx_size;
-    txfm_partition_update(xd->above_txfm_context + blk_col,
-                          xd->left_txfm_context + blk_row, tx_size, tx_size);
-  } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    const int bsw = tx_size_wide_unit[sub_txs];
-    const int bsh = tx_size_high_unit[sub_txs];
-
-#if CONFIG_ENTROPY_STATS
-    ++counts->txfm_partition[ctx][1];
-#endif
-    if (allow_update_cdf)
-      update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2);
-    ++x->txb_split_count;
-
-    if (sub_txs == TX_4X4) {
-      mbmi->inter_tx_size[txb_size_index] = TX_4X4;
-      mbmi->tx_size = TX_4X4;
-      txfm_partition_update(xd->above_txfm_context + blk_col,
-                            xd->left_txfm_context + blk_row, TX_4X4, tx_size);
-      return;
-    }
-
-    for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
-      for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
-        int offsetr = row;
-        int offsetc = col;
-
-        update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr,
-                          blk_col + offsetc, allow_update_cdf);
-      }
-    }
-  }
-}
-
-static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
-                                      BLOCK_SIZE plane_bsize, int mi_row,
-                                      int mi_col, FRAME_COUNTS *td_counts,
-                                      uint8_t allow_update_cdf) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
-  const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
-  const int bh = tx_size_high_unit[max_tx_size];
-  const int bw = tx_size_wide_unit[max_tx_size];
-  int idx, idy;
-
-  xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
-  xd->left_txfm_context =
-      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
-  for (idy = 0; idy < mi_height; idy += bh)
-    for (idx = 0; idx < mi_width; idx += bw)
-      update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx,
-                        allow_update_cdf);
-}
-
-static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
-                             int blk_col) {
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  const int max_blocks_high = max_block_high(xd, bsize, 0);
-  const int max_blocks_wide = max_block_wide(xd, bsize, 0);
-  const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
-  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
-
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
-  if (tx_size == plane_tx_size) {
-    mbmi->tx_size = tx_size;
-    txfm_partition_update(xd->above_txfm_context + blk_col,
-                          xd->left_txfm_context + blk_row, tx_size, tx_size);
-
-  } else {
-    if (tx_size == TX_8X8) {
-      mbmi->inter_tx_size[txb_size_index] = TX_4X4;
-      mbmi->tx_size = TX_4X4;
-      txfm_partition_update(xd->above_txfm_context + blk_col,
-                            xd->left_txfm_context + blk_row, TX_4X4, tx_size);
-      return;
-    }
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    const int bsw = tx_size_wide_unit[sub_txs];
-    const int bsh = tx_size_high_unit[sub_txs];
-    for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
-      for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
-        const int offsetr = blk_row + row;
-        const int offsetc = blk_col + col;
-        if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-        set_txfm_context(xd, sub_txs, offsetr, offsetc);
-      }
-    }
-  }
-}
-
-static void tx_partition_set_contexts(const AV1_COMMON *const cm,
-                                      MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
-                                      int mi_row, int mi_col) {
-  const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
-  const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
-  const int bh = tx_size_high_unit[max_tx_size];
-  const int bw = tx_size_wide_unit[max_tx_size];
-  int idx, idy;
-
-  xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
-  xd->left_txfm_context =
-      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
-
-  for (idy = 0; idy < mi_height; idy += bh)
-    for (idx = 0; idx < mi_width; idx += bw)
-      set_txfm_context(xd, max_tx_size, idy, idx);
-}
-
-static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
-                              ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize,
-                              int *rate) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO **mi_4x4 = xd->mi;
-  MB_MODE_INFO *mbmi = mi_4x4[0];
-  const int seg_skip =
-      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
-  const int mis = cm->mi_stride;
-  const int mi_width = mi_size_wide[bsize];
-  const int mi_height = mi_size_high[bsize];
-  const int is_inter = is_inter_block(mbmi);
-
-  if (cpi->sf.mode_pruning_based_on_two_pass_partition_search &&
-      x->cb_partition_scan) {
-    for (int row = mi_row; row < mi_row + mi_width;
-         row += FIRST_PARTITION_PASS_SAMPLE_REGION) {
-      for (int col = mi_col; col < mi_col + mi_height;
-           col += FIRST_PARTITION_PASS_SAMPLE_REGION) {
-        const int index = av1_first_partition_pass_stats_index(row, col);
-        FIRST_PARTITION_PASS_STATS *const stats =
-            &x->first_partition_pass_stats[index];
-        // Increase the counter of data samples.
-        ++stats->sample_counts;
-        // Increase the counter for ref_frame[0] and ref_frame[1].
-        if (stats->ref0_counts[mbmi->ref_frame[0]] < 255)
-          ++stats->ref0_counts[mbmi->ref_frame[0]];
-        if (mbmi->ref_frame[1] >= 0 &&
-            stats->ref1_counts[mbmi->ref_frame[0]] < 255)
-          ++stats->ref1_counts[mbmi->ref_frame[1]];
-      }
-    }
-  }
-
-  if (!is_inter) {
-    xd->cfl.is_chroma_reference =
-        is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
-                            cm->seq_params.subsampling_y);
-    xd->cfl.store_y = store_cfl_required(cm, xd);
-    mbmi->skip = 1;
-    for (int plane = 0; plane < num_planes; ++plane) {
-      av1_encode_intra_block_plane(cpi, x, bsize, plane,
-                                   cpi->optimize_seg_arr[mbmi->segment_id],
-                                   mi_row, mi_col);
-    }
-
-    // If there is at least one lossless segment, force the skip for intra
-    // block to be 0, in order to avoid the segment_id to be changed by in
-    // write_segment_id().
-    if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
-        cpi->has_lossless_segment)
-      mbmi->skip = 0;
-
-    xd->cfl.store_y = 0;
-    if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) {
-      for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
-        if (mbmi->palette_mode_info.palette_size[plane] > 0) {
-          if (!dry_run) {
-            av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
-                                   PALETTE_MAP, tile_data->allow_update_cdf,
-                                   td->counts);
-          } else if (dry_run == DRY_RUN_COSTCOEFFS) {
-            rate +=
-                av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
-          }
-        }
-      }
-    }
-
-    av1_update_txb_context(cpi, td, dry_run, bsize, rate, mi_row, mi_col,
-                           tile_data->allow_update_cdf);
-  } else {
-    int ref;
-    const int is_compound = has_second_ref(mbmi);
-
-    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-    for (ref = 0; ref < 1 + is_compound; ++ref) {
-      YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
-      assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
-      av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
-                           &xd->block_refs[ref]->sf, num_planes);
-    }
-
-    av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
-    if (mbmi->motion_mode == OBMC_CAUSAL)
-      av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
-
-#if CONFIG_MISMATCH_DEBUG
-    if (dry_run == OUTPUT_ENABLED) {
-      for (int plane = 0; plane < num_planes; ++plane) {
-        const struct macroblockd_plane *pd = &xd->plane[plane];
-        int pixel_c, pixel_r;
-        mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
-                        pd->subsampling_x, pd->subsampling_y);
-        if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
-                                 pd->subsampling_y))
-          continue;
-        mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, cm->frame_offset,
-                                  plane, pixel_c, pixel_r, pd->width,
-                                  pd->height,
-                                  xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
-      }
-    }
-#else
-    (void)num_planes;
-#endif
-
-    av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run);
-    av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate,
-                          tile_data->allow_update_cdf);
-  }
-
-  if (!dry_run) {
-    if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi))
-      td->intrabc_used_this_tile = 1;
-    if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id] &&
-        mbmi->sb_type > BLOCK_4X4 && !(is_inter && (mbmi->skip || seg_skip))) {
-      if (is_inter) {
-        tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts,
-                                  tile_data->allow_update_cdf);
-      } else {
-        if (mbmi->tx_size != max_txsize_rect_lookup[bsize])
-          ++x->txb_split_count;
-        if (block_signals_txsize(bsize)) {
-          const int tx_size_ctx = get_tx_size_context(xd);
-          const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
-          const int depth = tx_size_to_depth(mbmi->tx_size, bsize);
-          const int max_depths = bsize_to_max_depth(bsize);
-
-          if (tile_data->allow_update_cdf)
-            update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
-                       depth, max_depths + 1);
-#if CONFIG_ENTROPY_STATS
-          ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth];
-#endif
-        }
-      }
-      assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
-    } else {
-      int i, j;
-      TX_SIZE intra_tx_size;
-      // The new intra coding scheme requires no change of transform size
-      if (is_inter) {
-        if (xd->lossless[mbmi->segment_id]) {
-          intra_tx_size = TX_4X4;
-        } else {
-          intra_tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
-        }
-      } else {
-        intra_tx_size = mbmi->tx_size;
-      }
-
-      for (j = 0; j < mi_height; j++)
-        for (i = 0; i < mi_width; i++)
-          if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows)
-            mi_4x4[mis * j + i]->tx_size = intra_tx_size;
-
-      if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count;
-    }
-  }
-
-  if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type) &&
-      is_inter && !(mbmi->skip || seg_skip) &&
-      !xd->lossless[mbmi->segment_id]) {
-    if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
-  } else {
-    TX_SIZE tx_size = mbmi->tx_size;
-    // The new intra coding scheme requires no change of transform size
-    if (is_inter) {
-      if (xd->lossless[mbmi->segment_id]) {
-        tx_size = TX_4X4;
-      } else {
-        tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
-      }
-    } else {
-      tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
-    }
-    mbmi->tx_size = tx_size;
-    set_txfm_ctxs(tx_size, xd->n4_w, xd->n4_h,
-                  (mbmi->skip || seg_skip) && is_inter_block(mbmi), xd);
-  }
-  CFL_CTX *const cfl = &xd->cfl;
-  if (is_inter_block(mbmi) &&
-      !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
-                           cfl->subsampling_y) &&
-      is_cfl_allowed(xd)) {
-    cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
-  }
-}
diff --git a/third_party/aom/av1/encoder/encodeframe.h b/third_party/aom/av1/encoder/encodeframe.h
deleted file mode 100644
index e8cf9b468..000000000
--- a/third_party/aom/av1/encoder/encodeframe.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODEFRAME_H_
-#define AOM_AV1_ENCODER_ENCODEFRAME_H_
-
-#include "aom/aom_integer.h"
-#include "av1/common/blockd.h"
-#include "av1/common/enums.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define DELTAQ_MODULATION 1  // 0: variance based, 1: wavelet AC energy based
-
-struct macroblock;
-struct yv12_buffer_config;
-struct AV1_COMP;
-struct ThreadData;
-
-void av1_setup_src_planes(struct macroblock *x,
-                          const struct yv12_buffer_config *src, int mi_row,
-                          int mi_col, const int num_planes);
-
-void av1_encode_frame(struct AV1_COMP *cpi);
-
-void av1_alloc_tile_data(struct AV1_COMP *cpi);
-void av1_init_tile_data(struct AV1_COMP *cpi);
-void av1_encode_tile(struct AV1_COMP *cpi, struct ThreadData *td, int tile_row,
-                     int tile_col);
-void av1_encode_sb_row(struct AV1_COMP *cpi, struct ThreadData *td,
-                       int tile_row, int tile_col, int mi_row);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_ENCODEFRAME_H_
diff --git a/third_party/aom/av1/encoder/encodemb.c b/third_party/aom/av1/encoder/encodemb.c
deleted file mode 100644
index ad12577e6..000000000
--- a/third_party/aom/av1/encoder/encodemb.c
+++ /dev/null
@@ -1,649 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/bitwriter.h"
-#include "aom_dsp/quantize.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-#include "aom_util/debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-
-#include "av1/common/cfl.h"
-#include "av1/common/idct.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/scan.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/hybrid_fwd_txfm.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/rdopt.h"
-
-// Check if one needs to use c version subtraction.
-static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; }
-
-static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
-                           int16_t *diff, ptrdiff_t diff_stride,
-                           const uint8_t *src8, ptrdiff_t src_stride,
-                           const uint8_t *pred8, ptrdiff_t pred_stride) {
-  if (check_subtract_block_size(rows, cols)) {
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8,
-                                  src_stride, pred8, pred_stride, xd->bd);
-      return;
-    }
-    aom_subtract_block_c(rows, cols, diff, diff_stride, src8, src_stride, pred8,
-                         pred_stride);
-
-    return;
-  }
-
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
-                              pred8, pred_stride, xd->bd);
-    return;
-  }
-  aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
-                     pred_stride);
-}
-
-void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
-                      int blk_col, int blk_row, TX_SIZE tx_size) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
-  const int diff_stride = block_size_wide[plane_bsize];
-  const int src_stride = p->src.stride;
-  const int dst_stride = pd->dst.stride;
-  const int tx1d_width = tx_size_wide[tx_size];
-  const int tx1d_height = tx_size_high[tx_size];
-  uint8_t *dst =
-      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-  uint8_t *src =
-      &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
-  int16_t *src_diff =
-      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
-  subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src,
-                 src_stride, dst, dst_stride);
-}
-
-void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
-  struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
-  const BLOCK_SIZE plane_bsize =
-      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-  const int bw = block_size_wide[plane_bsize];
-  const int bh = block_size_high[plane_bsize];
-  const MACROBLOCKD *xd = &x->e_mbd;
-
-  subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
-                 pd->dst.buf, pd->dst.stride);
-}
-
-int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
-                   int block, TX_SIZE tx_size, TX_TYPE tx_type,
-                   const TXB_CTX *const txb_ctx, int fast_mode,
-                   int *rate_cost) {
-  MACROBLOCKD *const xd = &mb->e_mbd;
-  struct macroblock_plane *const p = &mb->plane[plane];
-  const int eob = p->eobs[block];
-  const int segment_id = xd->mi[0]->segment_id;
-
-  if (eob == 0 || !cpi->optimize_seg_arr[segment_id] ||
-      xd->lossless[segment_id]) {
-    *rate_cost = av1_cost_skip_txb(mb, txb_ctx, plane, tx_size);
-    return eob;
-  }
-
-  (void)fast_mode;
-  return av1_optimize_txb_new(cpi, mb, plane, block, tx_size, tx_type, txb_ctx,
-                              rate_cost, cpi->oxcf.sharpness);
-}
-
-typedef enum QUANT_FUNC {
-  QUANT_FUNC_LOWBD = 0,
-  QUANT_FUNC_HIGHBD = 1,
-  QUANT_FUNC_TYPES = 2
-} QUANT_FUNC;
-
-static AV1_QUANT_FACADE
-    quant_func_list[AV1_XFORM_QUANT_TYPES][QUANT_FUNC_TYPES] = {
-      { av1_quantize_fp_facade, av1_highbd_quantize_fp_facade },
-      { av1_quantize_b_facade, av1_highbd_quantize_b_facade },
-      { av1_quantize_dc_facade, av1_highbd_quantize_dc_facade },
-      { NULL, NULL }
-    };
-
-void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
-                     int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
-                     TX_SIZE tx_size, TX_TYPE tx_type,
-                     AV1_XFORM_QUANT xform_quant_idx) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
-
-  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
-  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  uint16_t *const eob = &p->eobs[block];
-  const int diff_stride = block_size_wide[plane_bsize];
-  int seg_id = mbmi->segment_id;
-  const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size);
-  // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
-  const qm_val_t *qmatrix =
-      IS_2D_TRANSFORM(tx_type) ? pd->seg_qmatrix[seg_id][qm_tx_size]
-                               : cm->gqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
-  const qm_val_t *iqmatrix =
-      IS_2D_TRANSFORM(tx_type)
-          ? pd->seg_iqmatrix[seg_id][qm_tx_size]
-          : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
-
-  const int src_offset = (blk_row * diff_stride + blk_col);
-  const int16_t *src_diff = &p->src_diff[src_offset << tx_size_wide_log2[0]];
-  QUANT_PARAM qparam;
-  qparam.log_scale = av1_get_tx_scale(tx_size);
-  qparam.tx_size = tx_size;
-  qparam.qmatrix = qmatrix;
-  qparam.iqmatrix = iqmatrix;
-  TxfmParam txfm_param;
-  txfm_param.tx_type = tx_type;
-  txfm_param.tx_size = tx_size;
-  txfm_param.lossless = xd->lossless[mbmi->segment_id];
-  txfm_param.tx_set_type = av1_get_ext_tx_set_type(
-      txfm_param.tx_size, is_inter_block(mbmi), cm->reduced_tx_set_used);
-
-  txfm_param.bd = xd->bd;
-  txfm_param.is_hbd = get_bitdepth_data_path_index(xd);
-
-  av1_fwd_txfm(src_diff, coeff, diff_stride, &txfm_param);
-
-  if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
-    const int n_coeffs = av1_get_max_eob(tx_size);
-    if (LIKELY(!x->skip_block)) {
-      quant_func_list[xform_quant_idx][txfm_param.is_hbd](
-          coeff, n_coeffs, p, qcoeff, dqcoeff, eob, scan_order, &qparam);
-    } else {
-      av1_quantize_skip(n_coeffs, qcoeff, dqcoeff, eob);
-    }
-  }
-  // NOTE: optimize_b_following is ture means av1_optimze_b will be called
-  // When the condition of doing optimize_b is changed,
-  // this flag need update simultaneously
-  const int optimize_b_following =
-      (xform_quant_idx != AV1_XFORM_QUANT_FP) || (txfm_param.lossless);
-  if (optimize_b_following) {
-    p->txb_entropy_ctx[block] =
-        (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
-  } else {
-    p->txb_entropy_ctx[block] = 0;
-  }
-  return;
-}
-
-static void encode_block(int plane, int block, int blk_row, int blk_col,
-                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg,
-                         int mi_row, int mi_col, RUN_TYPE dry_run) {
-  (void)mi_row;
-  (void)mi_col;
-  (void)dry_run;
-  struct encode_b_args *const args = arg;
-  const AV1_COMMON *const cm = &args->cpi->common;
-  MACROBLOCK *const x = args->x;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  struct macroblock_plane *const p = &x->plane[plane];
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  uint8_t *dst;
-  ENTROPY_CONTEXT *a, *l;
-  int dummy_rate_cost = 0;
-
-  const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  dst = &pd->dst
-             .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
-
-  a = &args->ta[blk_col];
-  l = &args->tl[blk_row];
-
-  if (!is_blk_skip(x, plane, blk_row * bw + blk_col) && !mbmi->skip_mode) {
-    TX_TYPE tx_type = av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col,
-                                      tx_size, cm->reduced_tx_set_used);
-    if (args->enable_optimize_b) {
-      av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
-                      tx_size, tx_type, AV1_XFORM_QUANT_FP);
-      TXB_CTX txb_ctx;
-      get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
-      av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, &txb_ctx, 1,
-                     &dummy_rate_cost);
-    } else {
-      av1_xform_quant(
-          cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
-          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
-    }
-  } else {
-    p->eobs[block] = 0;
-    p->txb_entropy_ctx[block] = 0;
-  }
-
-  av1_set_txb_context(x, plane, block, tx_size, a, l);
-
-  if (p->eobs[block]) {
-    *(args->skip) = 0;
-
-    TX_TYPE tx_type = av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col,
-                                      tx_size, cm->reduced_tx_set_used);
-    av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
-                                pd->dst.stride, p->eobs[block],
-                                cm->reduced_tx_set_used);
-  }
-
-  if (p->eobs[block] == 0 && plane == 0) {
-  // TODO(debargha, jingning): Temporarily disable txk_type check for eob=0
-  // case. It is possible that certain collision in hash index would cause
-  // the assertion failure. To further optimize the rate-distortion
-  // performance, we need to re-visit this part and enable this assert
-  // again.
-#if 0
-    if (args->cpi->oxcf.aq_mode == NO_AQ &&
-        args->cpi->oxcf.deltaq_mode == NO_DELTA_Q) {
-      // TODO(jingning,angiebird,huisu@google.com): enable txk_check when
-      // enable_optimize_b is true to detect potential RD bug.
-      const uint8_t disable_txk_check = args->enable_optimize_b;
-      if (!disable_txk_check) {
-        assert(mbmi->txk_type[av1_get_txk_type_index(plane_bsize, blk_row,
-                                                     blk_col)] == DCT_DCT);
-      }
-    }
-#endif
-    update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
-                     DCT_DCT);
-  }
-
-#if CONFIG_MISMATCH_DEBUG
-  if (dry_run == OUTPUT_ENABLED) {
-    int pixel_c, pixel_r;
-    BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
-    int blk_w = block_size_wide[bsize];
-    int blk_h = block_size_high[bsize];
-    mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row,
-                    pd->subsampling_x, pd->subsampling_y);
-    mismatch_record_block_tx(dst, pd->dst.stride, cm->frame_offset, plane,
-                             pixel_c, pixel_r, blk_w, blk_h,
-                             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
-  }
-#endif
-}
-
-static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
-                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                               void *arg, int mi_row, int mi_col,
-                               RUN_TYPE dry_run) {
-  (void)mi_row;
-  (void)mi_col;
-  struct encode_b_args *const args = arg;
-  MACROBLOCK *const x = args->x;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
-  const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
-  const TX_SIZE plane_tx_size =
-      plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
-                                    pd->subsampling_y)
-            : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
-                                                         blk_col)];
-  if (!plane) {
-    assert(tx_size_wide[tx_size] >= tx_size_wide[plane_tx_size] &&
-           tx_size_high[tx_size] >= tx_size_high[plane_tx_size]);
-  }
-
-  if (tx_size == plane_tx_size || plane) {
-    encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg,
-                 mi_row, mi_col, dry_run);
-  } else {
-    assert(tx_size < TX_SIZES_ALL);
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
-    assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
-    // This is the square transform block partition entry point.
-    const int bsw = tx_size_wide_unit[sub_txs];
-    const int bsh = tx_size_high_unit[sub_txs];
-    const int step = bsh * bsw;
-    assert(bsw > 0 && bsh > 0);
-
-    for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
-      for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
-        const int offsetr = blk_row + row;
-        const int offsetc = blk_col + col;
-
-        if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
-        encode_block_inter(plane, block, offsetr, offsetc, plane_bsize, sub_txs,
-                           arg, mi_row, mi_col, dry_run);
-        block += step;
-      }
-    }
-  }
-}
-
-void av1_foreach_transformed_block_in_plane(
-    const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
-    foreach_transformed_block_visitor visit, void *arg) {
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
-  // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
-  // transform size varies per plane, look it up in a common way.
-  const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
-  const BLOCK_SIZE plane_bsize =
-      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-  const uint8_t txw_unit = tx_size_wide_unit[tx_size];
-  const uint8_t txh_unit = tx_size_high_unit[tx_size];
-  const int step = txw_unit * txh_unit;
-  int i = 0, r, c;
-
-  // If mb_to_right_edge is < 0 we are in a situation in which
-  // the current block size extends into the UMV and we won't
-  // visit the sub blocks that are wholly within the UMV.
-  const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-  const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
-
-  int blk_row, blk_col;
-
-  const BLOCK_SIZE max_unit_bsize =
-      get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
-  int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
-  int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-  mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
-  mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
-
-  // Keep track of the row and column of the blocks we use so that we know
-  // if we are in the unrestricted motion border.
-  for (r = 0; r < max_blocks_high; r += mu_blocks_high) {
-    const int unit_height = AOMMIN(mu_blocks_high + r, max_blocks_high);
-    // Skip visiting the sub blocks that are wholly within the UMV.
-    for (c = 0; c < max_blocks_wide; c += mu_blocks_wide) {
-      const int unit_width = AOMMIN(mu_blocks_wide + c, max_blocks_wide);
-      for (blk_row = r; blk_row < unit_height; blk_row += txh_unit) {
-        for (blk_col = c; blk_col < unit_width; blk_col += txw_unit) {
-          visit(plane, i, blk_row, blk_col, plane_bsize, tx_size, arg);
-          i += step;
-        }
-      }
-    }
-  }
-}
-
-void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
-                                   BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                   foreach_transformed_block_visitor visit,
-                                   void *arg, const int num_planes) {
-  for (int plane = 0; plane < num_planes; ++plane) {
-    if (!is_chroma_reference(mi_row, mi_col, bsize,
-                             xd->plane[plane].subsampling_x,
-                             xd->plane[plane].subsampling_y))
-      continue;
-    av1_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
-  }
-}
-
-typedef struct encode_block_pass1_args {
-  AV1_COMMON *cm;
-  MACROBLOCK *x;
-} encode_block_pass1_args;
-
-static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
-                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                               void *arg) {
-  encode_block_pass1_args *args = (encode_block_pass1_args *)arg;
-  AV1_COMMON *cm = args->cm;
-  MACROBLOCK *const x = args->x;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblock_plane *const p = &x->plane[plane];
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  TxfmParam txfm_param;
-  uint8_t *dst;
-  dst = &pd->dst
-             .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
-  av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                  DCT_DCT, AV1_XFORM_QUANT_B);
-
-  if (p->eobs[block] > 0) {
-    txfm_param.bd = xd->bd;
-    txfm_param.is_hbd = get_bitdepth_data_path_index(xd);
-    txfm_param.tx_type = DCT_DCT;
-    txfm_param.tx_size = tx_size;
-    txfm_param.eob = p->eobs[block];
-    txfm_param.lossless = xd->lossless[xd->mi[0]->segment_id];
-    txfm_param.tx_set_type = av1_get_ext_tx_set_type(
-        txfm_param.tx_size, is_inter_block(xd->mi[0]), cm->reduced_tx_set_used);
-    if (txfm_param.is_hbd) {
-      av1_highbd_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
-      return;
-    }
-    av1_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
-  }
-}
-
-void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
-  encode_block_pass1_args args = { cm, x };
-  av1_subtract_plane(x, bsize, 0);
-  av1_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
-                                         encode_block_pass1, &args);
-}
-
-void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                   int mi_row, int mi_col, RUN_TYPE dry_run) {
-  (void)dry_run;
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  struct optimize_ctx ctx;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  struct encode_b_args arg = { cpi,
-                               x,
-                               &ctx,
-                               &mbmi->skip,
-                               NULL,
-                               NULL,
-                               cpi->optimize_seg_arr[mbmi->segment_id] };
-  int plane;
-
-  mbmi->skip = 1;
-
-  if (x->skip) return;
-
-  for (plane = 0; plane < num_planes; ++plane) {
-    const int subsampling_x = xd->plane[plane].subsampling_x;
-    const int subsampling_y = xd->plane[plane].subsampling_y;
-
-    if (!is_chroma_reference(mi_row, mi_col, bsize, subsampling_x,
-                             subsampling_y))
-      continue;
-
-    const BLOCK_SIZE bsizec =
-        scale_chroma_bsize(bsize, subsampling_x, subsampling_y);
-
-    // TODO(jingning): Clean this up.
-    const struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y);
-    const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-    const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
-    const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane);
-
-    const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
-    const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
-    const int bh = block_size_high[txb_size] >> tx_size_high_log2[0];
-    int idx, idy;
-    int block = 0;
-    int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
-    av1_get_entropy_contexts(bsizec, pd, ctx.ta[plane], ctx.tl[plane]);
-
-    av1_subtract_plane(x, bsizec, plane);
-
-    arg.ta = ctx.ta[plane];
-    arg.tl = ctx.tl[plane];
-
-    const BLOCK_SIZE max_unit_bsize =
-        get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
-    int mu_blocks_wide =
-        block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
-    int mu_blocks_high =
-        block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
-    mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide);
-    mu_blocks_high = AOMMIN(mi_height, mu_blocks_high);
-
-    for (idy = 0; idy < mi_height; idy += mu_blocks_high) {
-      for (idx = 0; idx < mi_width; idx += mu_blocks_wide) {
-        int blk_row, blk_col;
-        const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height);
-        const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width);
-        for (blk_row = idy; blk_row < unit_height; blk_row += bh) {
-          for (blk_col = idx; blk_col < unit_width; blk_col += bw) {
-            encode_block_inter(plane, block, blk_row, blk_col, plane_bsize,
-                               max_tx_size, &arg, mi_row, mi_col, dry_run);
-            block += step;
-          }
-        }
-      }
-    }
-  }
-}
-
-static void encode_block_intra_and_set_context(int plane, int block,
-                                               int blk_row, int blk_col,
-                                               BLOCK_SIZE plane_bsize,
-                                               TX_SIZE tx_size, void *arg) {
-  av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                         arg);
-
-  struct encode_b_args *const args = arg;
-  MACROBLOCK *x = args->x;
-  ENTROPY_CONTEXT *a = &args->ta[blk_col];
-  ENTROPY_CONTEXT *l = &args->tl[blk_row];
-  av1_set_txb_context(x, plane, block, tx_size, a, l);
-}
-
-void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
-                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                            void *arg) {
-  struct encode_b_args *const args = arg;
-  const AV1_COMMON *const cm = &args->cpi->common;
-  MACROBLOCK *const x = args->x;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  struct macroblock_plane *const p = &x->plane[plane];
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  PLANE_TYPE plane_type = get_plane_type(plane);
-  const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
-                                          tx_size, cm->reduced_tx_set_used);
-  uint16_t *eob = &p->eobs[block];
-  const int dst_stride = pd->dst.stride;
-  uint8_t *dst =
-      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-  int dummy_rate_cost = 0;
-
-  av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
-
-  const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  if (plane == 0 && is_blk_skip(x, plane, blk_row * bw + blk_col)) {
-    *eob = 0;
-    p->txb_entropy_ctx[block] = 0;
-  } else {
-    av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
-
-    const ENTROPY_CONTEXT *a = &args->ta[blk_col];
-    const ENTROPY_CONTEXT *l = &args->tl[blk_row];
-    if (args->enable_optimize_b) {
-      av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
-                      tx_size, tx_type, AV1_XFORM_QUANT_FP);
-      TXB_CTX txb_ctx;
-      get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
-      av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, &txb_ctx, 1,
-                     &dummy_rate_cost);
-    } else {
-      av1_xform_quant(
-          cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
-          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
-    }
-  }
-
-  if (*eob) {
-    av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
-                                dst_stride, *eob, cm->reduced_tx_set_used);
-  }
-
-  if (*eob == 0 && plane == 0) {
-  // TODO(jingning): Temporarily disable txk_type check for eob=0 case.
-  // It is possible that certain collision in hash index would cause
-  // the assertion failure. To further optimize the rate-distortion
-  // performance, we need to re-visit this part and enable this assert
-  // again.
-#if 0
-    if (args->cpi->oxcf.aq_mode == NO_AQ
-        && args->cpi->oxcf.deltaq_mode == NO_DELTA_Q) {
-      assert(mbmi->txk_type[av1_get_txk_type_index(plane_bsize, blk_row,
-                                                   blk_col)] == DCT_DCT);
-    }
-#endif
-    update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
-                     DCT_DCT);
-  }
-
-  // For intra mode, skipped blocks are so rare that transmitting skip=1 is
-  // very expensive.
-  *(args->skip) = 0;
-
-  if (plane == AOM_PLANE_Y && xd->cfl.store_y) {
-    cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
-  }
-}
-
-void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                                  BLOCK_SIZE bsize, int plane,
-                                  int enable_optimize_b, int mi_row,
-                                  int mi_col) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  ENTROPY_CONTEXT ta[MAX_MIB_SIZE] = { 0 };
-  ENTROPY_CONTEXT tl[MAX_MIB_SIZE] = { 0 };
-
-  struct encode_b_args arg = {
-    cpi, x, NULL, &(xd->mi[0]->skip), ta, tl, enable_optimize_b
-  };
-
-  if (!is_chroma_reference(mi_row, mi_col, bsize,
-                           xd->plane[plane].subsampling_x,
-                           xd->plane[plane].subsampling_y))
-    return;
-
-  if (enable_optimize_b) {
-    const struct macroblockd_plane *const pd = &xd->plane[plane];
-    av1_get_entropy_contexts(bsize, pd, ta, tl);
-  }
-  av1_foreach_transformed_block_in_plane(
-      xd, bsize, plane, encode_block_intra_and_set_context, &arg);
-}
diff --git a/third_party/aom/av1/encoder/encodemb.h b/third_party/aom/av1/encoder/encodemb.h
deleted file mode 100644
index 39080de59..000000000
--- a/third_party/aom/av1/encoder/encodemb.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODEMB_H_
-#define AOM_AV1_ENCODER_ENCODEMB_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "av1/encoder/block.h"
-#include "av1/encoder/tokenize.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct optimize_ctx {
-  ENTROPY_CONTEXT ta[MAX_MB_PLANE][MAX_MIB_SIZE];
-  ENTROPY_CONTEXT tl[MAX_MB_PLANE][MAX_MIB_SIZE];
-};
-
-struct encode_b_args {
-  const struct AV1_COMP *cpi;
-  MACROBLOCK *x;
-  struct optimize_ctx *ctx;
-  int8_t *skip;
-  ENTROPY_CONTEXT *ta;
-  ENTROPY_CONTEXT *tl;
-  int8_t enable_optimize_b;
-};
-
-typedef enum AV1_XFORM_QUANT {
-  AV1_XFORM_QUANT_FP = 0,
-  AV1_XFORM_QUANT_B = 1,
-  AV1_XFORM_QUANT_DC = 2,
-  AV1_XFORM_QUANT_SKIP_QUANT,
-  AV1_XFORM_QUANT_TYPES,
-} AV1_XFORM_QUANT;
-
-void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                   int mi_row, int mi_col, RUN_TYPE dry_run);
-
-void av1_foreach_transformed_block_in_plane(
-    const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
-    foreach_transformed_block_visitor visit, void *arg);
-
-void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
-                                   BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                   foreach_transformed_block_visitor visit,
-                                   void *arg, const int num_planes);
-
-void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
-
-void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
-                     int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
-                     TX_SIZE tx_size, TX_TYPE tx_type,
-                     AV1_XFORM_QUANT xform_quant_idx);
-
-int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
-                   int block, TX_SIZE tx_size, TX_TYPE tx_type,
-                   const TXB_CTX *const txb_ctx, int fast_mode, int *rate_cost);
-
-void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
-                      int blk_col, int blk_row, TX_SIZE tx_size);
-
-void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
-
-static INLINE void av1_set_txb_context(MACROBLOCK *x, int plane, int block,
-                                       TX_SIZE tx_size, ENTROPY_CONTEXT *a,
-                                       ENTROPY_CONTEXT *l) {
-  const uint8_t ctx = x->plane[plane].txb_entropy_ctx[block];
-  memset(a, ctx, tx_size_wide_unit[tx_size] * sizeof(*a));
-  memset(l, ctx, tx_size_high_unit[tx_size] * sizeof(*l));
-}
-
-void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
-                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
-
-void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                                  BLOCK_SIZE bsize, int plane,
-                                  int enable_optimize_b, int mi_row,
-                                  int mi_col);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_ENCODEMB_H_
diff --git a/third_party/aom/av1/encoder/encodemv.c b/third_party/aom/av1/encoder/encodemv.c
deleted file mode 100644
index 42eb5abf6..000000000
--- a/third_party/aom/av1/encoder/encodemv.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-
-#include "av1/common/common.h"
-#include "av1/common/entropymode.h"
-
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodemv.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/bitops.h"
-
-static INLINE int mv_class_base(MV_CLASS_TYPE c) {
-  return c ? CLASS0_SIZE << (c + 2) : 0;
-}
-
-// If n != 0, returns the floor of log base 2 of n. If n == 0, returns 0.
-static INLINE uint8_t log_in_base_2(unsigned int n) {
-  // get_msb() is only valid when n != 0.
-  return n == 0 ? 0 : get_msb(n);
-}
-
-static INLINE MV_CLASS_TYPE get_mv_class(int z, int *offset) {
-  const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096)
-                              ? MV_CLASS_10
-                              : (MV_CLASS_TYPE)log_in_base_2(z >> 3);
-  if (offset) *offset = z - mv_class_base(c);
-  return c;
-}
-
-static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
-                                MvSubpelPrecision precision) {
-  assert(comp != 0);
-  int offset;
-  const int sign = comp < 0;
-  const int mag = sign ? -comp : comp;
-  const int mv_class = get_mv_class(mag - 1, &offset);
-  const int d = offset >> 3;         // int mv data
-  const int fr = (offset >> 1) & 3;  // fractional mv data
-  const int hp = offset & 1;         // high precision mv data
-
-  // Sign
-  aom_write_symbol(w, sign, mvcomp->sign_cdf, 2);
-
-  // Class
-  aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES);
-
-  // Integer bits
-  if (mv_class == MV_CLASS_0) {
-    aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE);
-  } else {
-    int i;
-    const int n = mv_class + CLASS0_BITS - 1;  // number of bits
-    for (i = 0; i < n; ++i)
-      aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2);
-  }
-  // Fractional bits
-  if (precision > MV_SUBPEL_NONE) {
-    aom_write_symbol(
-        w, fr,
-        mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
-        MV_FP_SIZE);
-  }
-
-  // High precision bit
-  if (precision > MV_SUBPEL_LOW_PRECISION)
-    aom_write_symbol(
-        w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
-        2);
-}
-
-static void build_nmv_component_cost_table(int *mvcost,
-                                           const nmv_component *const mvcomp,
-                                           MvSubpelPrecision precision) {
-  int i, v;
-  int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
-  int bits_cost[MV_OFFSET_BITS][2];
-  int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE];
-  int class0_hp_cost[2], hp_cost[2];
-
-  av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL);
-  av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL);
-  av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL);
-  for (i = 0; i < MV_OFFSET_BITS; ++i) {
-    av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL);
-  }
-
-  for (i = 0; i < CLASS0_SIZE; ++i)
-    av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i], NULL);
-  av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
-
-  if (precision > MV_SUBPEL_LOW_PRECISION) {
-    av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
-    av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
-  }
-  mvcost[0] = 0;
-  for (v = 1; v <= MV_MAX; ++v) {
-    int z, c, o, d, e, f, cost = 0;
-    z = v - 1;
-    c = get_mv_class(z, &o);
-    cost += class_cost[c];
-    d = (o >> 3);     /* int mv data */
-    f = (o >> 1) & 3; /* fractional pel mv data */
-    e = (o & 1);      /* high precision mv data */
-    if (c == MV_CLASS_0) {
-      cost += class0_cost[d];
-    } else {
-      const int b = c + CLASS0_BITS - 1; /* number of bits */
-      for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
-    }
-    if (precision > MV_SUBPEL_NONE) {
-      if (c == MV_CLASS_0) {
-        cost += class0_fp_cost[d][f];
-      } else {
-        cost += fp_cost[f];
-      }
-      if (precision > MV_SUBPEL_LOW_PRECISION) {
-        if (c == MV_CLASS_0) {
-          cost += class0_hp_cost[e];
-        } else {
-          cost += hp_cost[e];
-        }
-      }
-    }
-    mvcost[v] = cost + sign_cost[0];
-    mvcost[-v] = cost + sign_cost[1];
-  }
-}
-
-void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
-                   nmv_context *mvctx, int usehp) {
-  const MV diff = { mv->row - ref->row, mv->col - ref->col };
-  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
-  if (cpi->common.cur_frame_force_integer_mv) {
-    usehp = MV_SUBPEL_NONE;
-  }
-  aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
-  if (mv_joint_vertical(j))
-    encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
-
-  if (mv_joint_horizontal(j))
-    encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);
-
-  // If auto_mv_step_size is enabled then keep track of the largest
-  // motion vector component used.
-  if (cpi->sf.mv.auto_mv_step_size) {
-    unsigned int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
-    cpi->max_mv_magnitude = AOMMAX(maxv, cpi->max_mv_magnitude);
-  }
-}
-
-void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
-                   nmv_context *mvctx) {
-  // DV and ref DV should not have sub-pel.
-  assert((mv->col & 7) == 0);
-  assert((mv->row & 7) == 0);
-  assert((ref->col & 7) == 0);
-  assert((ref->row & 7) == 0);
-  const MV diff = { mv->row - ref->row, mv->col - ref->col };
-  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
-
-  aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
-  if (mv_joint_vertical(j))
-    encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE);
-
-  if (mv_joint_horizontal(j))
-    encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE);
-}
-
-void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
-                              const nmv_context *ctx,
-                              MvSubpelPrecision precision) {
-  av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL);
-  build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
-  build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
-}
-
-int_mv av1_get_ref_mv_from_stack(int ref_idx,
-                                 const MV_REFERENCE_FRAME *ref_frame,
-                                 int ref_mv_idx,
-                                 const MB_MODE_INFO_EXT *mbmi_ext) {
-  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
-  const CANDIDATE_MV *curr_ref_mv_stack =
-      mbmi_ext->ref_mv_stack[ref_frame_type];
-  int_mv ref_mv;
-  ref_mv.as_int = INVALID_MV;
-
-  if (ref_frame[1] > INTRA_FRAME) {
-    if (ref_idx == 0) {
-      ref_mv = curr_ref_mv_stack[ref_mv_idx].this_mv;
-    } else {
-      assert(ref_idx == 1);
-      ref_mv = curr_ref_mv_stack[ref_mv_idx].comp_mv;
-    }
-  } else {
-    assert(ref_idx == 0);
-    if (ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]) {
-      ref_mv = curr_ref_mv_stack[ref_mv_idx].this_mv;
-    } else {
-      ref_mv = mbmi_ext->global_mvs[ref_frame_type];
-    }
-  }
-  return ref_mv;
-}
-
-int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) {
-  const MACROBLOCKD *xd = &x->e_mbd;
-  const MB_MODE_INFO *mbmi = xd->mi[0];
-  int ref_mv_idx = mbmi->ref_mv_idx;
-  if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) {
-    assert(has_second_ref(mbmi));
-    ref_mv_idx += 1;
-  }
-  return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
-                                   x->mbmi_ext);
-}
-
-void av1_find_best_ref_mvs_from_stack(int allow_hp,
-                                      const MB_MODE_INFO_EXT *mbmi_ext,
-                                      MV_REFERENCE_FRAME ref_frame,
-                                      int_mv *nearest_mv, int_mv *near_mv,
-                                      int is_integer) {
-  const int ref_idx = 0;
-  MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
-  *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
-  lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer);
-  *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext);
-  lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer);
-}
diff --git a/third_party/aom/av1/encoder/encodemv.h b/third_party/aom/av1/encoder/encodemv.h
deleted file mode 100644
index 37ff547c8..000000000
--- a/third_party/aom/av1/encoder/encodemv.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODEMV_H_
-#define AOM_AV1_ENCODER_ENCODEMV_H_
-
-#include "av1/encoder/encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
-                   nmv_context *mvctx, int usehp);
-
-void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
-                              const nmv_context *mvctx,
-                              MvSubpelPrecision precision);
-
-void av1_update_mv_count(ThreadData *td);
-
-void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
-                   nmv_context *mvctx);
-int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx);
-int_mv av1_get_ref_mv_from_stack(int ref_idx,
-                                 const MV_REFERENCE_FRAME *ref_frame,
-                                 int ref_mv_idx,
-                                 const MB_MODE_INFO_EXT *mbmi_ext);
-void av1_find_best_ref_mvs_from_stack(int allow_hp,
-                                      const MB_MODE_INFO_EXT *mbmi_ext,
-                                      MV_REFERENCE_FRAME ref_frame,
-                                      int_mv *nearest_mv, int_mv *near_mv,
-                                      int is_integer);
-
-static INLINE MV_JOINT_TYPE av1_get_mv_joint(const MV *mv) {
-  if (mv->row == 0) {
-    return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
-  } else {
-    return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
-  }
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_ENCODEMV_H_
diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c
deleted file mode 100644
index a2da2df89..000000000
--- a/third_party/aom/av1/encoder/encoder.c
+++ /dev/null
@@ -1,6437 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#if CONFIG_DENOISE
-#include "aom_dsp/grain_table.h"
-#include "aom_dsp/noise_util.h"
-#include "aom_dsp/noise_model.h"
-#endif
-#include "aom_dsp/psnr.h"
-#if CONFIG_INTERNAL_STATS
-#include "aom_dsp/ssim.h"
-#endif
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-#include "aom_scale/aom_scale.h"
-#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-#include "aom_util/debug_util.h"
-#endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/cdef.h"
-#include "av1/common/filter.h"
-#include "av1/common/idct.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/resize.h"
-#include "av1/common/tile_common.h"
-
-#include "av1/encoder/aq_complexity.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/bitstream.h"
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/ethread.h"
-#include "av1/encoder/firstpass.h"
-#include "av1/encoder/grain_test_vectors.h"
-#include "av1/encoder/hash_motion.h"
-#include "av1/encoder/mbgraph.h"
-#include "av1/encoder/picklpf.h"
-#include "av1/encoder/pickrst.h"
-#include "av1/encoder/random.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/speed_features.h"
-#include "av1/encoder/temporal_filter.h"
-
-#define DEFAULT_EXPLICIT_ORDER_HINT_BITS 7
-
-// av1 uses 10,000,000 ticks/second as time stamp
-#define TICKS_PER_SEC 10000000LL
-
-#if CONFIG_ENTROPY_STATS
-FRAME_COUNTS aggregate_fc;
-#endif  // CONFIG_ENTROPY_STATS
-
-#define AM_SEGMENT_ID_INACTIVE 7
-#define AM_SEGMENT_ID_ACTIVE 0
-
-// Whether to use high precision mv for altref computation.
-#define ALTREF_HIGH_PRECISION_MV 1
-
-// Q threshold for high precision mv. Choose a very high value for now so that
-// HIGH_PRECISION is always chosen.
-#define HIGH_PRECISION_MV_QTHRESH 200
-
-// #define OUTPUT_YUV_REC
-#ifdef OUTPUT_YUV_SKINMAP
-FILE *yuv_skinmap_file = NULL;
-#endif
-#ifdef OUTPUT_YUV_REC
-FILE *yuv_rec_file;
-#define FILE_NAME_LEN 100
-#endif
-
-static INLINE void Scale2Ratio(AOM_SCALING mode, int *hr, int *hs) {
-  switch (mode) {
-    case NORMAL:
-      *hr = 1;
-      *hs = 1;
-      break;
-    case FOURFIVE:
-      *hr = 4;
-      *hs = 5;
-      break;
-    case THREEFIVE:
-      *hr = 3;
-      *hs = 5;
-      break;
-    case ONETWO:
-      *hr = 1;
-      *hs = 2;
-      break;
-    default:
-      *hr = 1;
-      *hs = 1;
-      assert(0);
-      break;
-  }
-}
-
-// Mark all inactive blocks as active. Other segmentation features may be set
-// so memset cannot be used, instead only inactive blocks should be reset.
-static void suppress_active_map(AV1_COMP *cpi) {
-  unsigned char *const seg_map = cpi->segmentation_map;
-  int i;
-  if (cpi->active_map.enabled || cpi->active_map.update)
-    for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
-      if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
-        seg_map[i] = AM_SEGMENT_ID_ACTIVE;
-}
-
-static void apply_active_map(AV1_COMP *cpi) {
-  struct segmentation *const seg = &cpi->common.seg;
-  unsigned char *const seg_map = cpi->segmentation_map;
-  const unsigned char *const active_map = cpi->active_map.map;
-  int i;
-
-  assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
-
-  if (frame_is_intra_only(&cpi->common)) {
-    cpi->active_map.enabled = 0;
-    cpi->active_map.update = 1;
-  }
-
-  if (cpi->active_map.update) {
-    if (cpi->active_map.enabled) {
-      for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
-        if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
-      av1_enable_segmentation(seg);
-      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
-      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H);
-      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V);
-      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U);
-      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V);
-
-      av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H,
-                      -MAX_LOOP_FILTER);
-      av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V,
-                      -MAX_LOOP_FILTER);
-      av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U,
-                      -MAX_LOOP_FILTER);
-      av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V,
-                      -MAX_LOOP_FILTER);
-    } else {
-      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
-      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H);
-      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V);
-      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U);
-      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V);
-      if (seg->enabled) {
-        seg->update_data = 1;
-        seg->update_map = 1;
-      }
-    }
-    cpi->active_map.update = 0;
-  }
-}
-
-int av1_set_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
-                       int cols) {
-  if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
-    unsigned char *const active_map_8x8 = cpi->active_map.map;
-    const int mi_rows = cpi->common.mi_rows;
-    const int mi_cols = cpi->common.mi_cols;
-    const int row_scale = mi_size_high[BLOCK_16X16] == 2 ? 1 : 2;
-    const int col_scale = mi_size_wide[BLOCK_16X16] == 2 ? 1 : 2;
-    cpi->active_map.update = 1;
-    if (new_map_16x16) {
-      int r, c;
-      for (r = 0; r < mi_rows; ++r) {
-        for (c = 0; c < mi_cols; ++c) {
-          active_map_8x8[r * mi_cols + c] =
-              new_map_16x16[(r >> row_scale) * cols + (c >> col_scale)]
-                  ? AM_SEGMENT_ID_ACTIVE
-                  : AM_SEGMENT_ID_INACTIVE;
-        }
-      }
-      cpi->active_map.enabled = 1;
-    } else {
-      cpi->active_map.enabled = 0;
-    }
-    return 0;
-  } else {
-    return -1;
-  }
-}
-
-int av1_get_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
-                       int cols) {
-  if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
-      new_map_16x16) {
-    unsigned char *const seg_map_8x8 = cpi->segmentation_map;
-    const int mi_rows = cpi->common.mi_rows;
-    const int mi_cols = cpi->common.mi_cols;
-    const int row_scale = mi_size_high[BLOCK_16X16] == 2 ? 1 : 2;
-    const int col_scale = mi_size_wide[BLOCK_16X16] == 2 ? 1 : 2;
-
-    memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
-    if (cpi->active_map.enabled) {
-      int r, c;
-      for (r = 0; r < mi_rows; ++r) {
-        for (c = 0; c < mi_cols; ++c) {
-          // Cyclic refresh segments are considered active despite not having
-          // AM_SEGMENT_ID_ACTIVE
-          new_map_16x16[(r >> row_scale) * cols + (c >> col_scale)] |=
-              seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
-        }
-      }
-    }
-    return 0;
-  } else {
-    return -1;
-  }
-}
-
-static void set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv,
-                                  int cur_frame_force_integer_mv) {
-  MACROBLOCK *const mb = &cpi->td.mb;
-  cpi->common.allow_high_precision_mv =
-      allow_high_precision_mv && cur_frame_force_integer_mv == 0;
-  const int copy_hp =
-      cpi->common.allow_high_precision_mv && cur_frame_force_integer_mv == 0;
-  int *(*src)[2] = copy_hp ? &mb->nmvcost_hp : &mb->nmvcost;
-  mb->mv_cost_stack = *src;
-}
-
-static BLOCK_SIZE select_sb_size(const AV1_COMP *const cpi) {
-  const AV1_COMMON *const cm = &cpi->common;
-
-  if (cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_64X64)
-    return BLOCK_64X64;
-#if CONFIG_FILEOPTIONS
-  if (cm->options && cm->options->ext_partition)
-#endif
-    if (cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_128X128)
-      return BLOCK_128X128;
-
-  assert(cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_DYNAMIC);
-
-// TODO(any): Possibly could improve this with a heuristic.
-#if CONFIG_FILEOPTIONS
-  if (cm->options && !cm->options->ext_partition) return BLOCK_64X64;
-#endif
-
-  // When superres / resize is on, 'cm->width / height' can change between
-  // calls, so we don't apply this heuristic there. Also, this heuristic gives
-  // compression gain for speed >= 2 only.
-  if (cpi->oxcf.superres_mode == SUPERRES_NONE &&
-      cpi->oxcf.resize_mode == RESIZE_NONE && cpi->oxcf.speed >= 2) {
-    return (cm->width >= 480 && cm->height >= 360) ? BLOCK_128X128
-                                                   : BLOCK_64X64;
-  }
-
-  return BLOCK_128X128;
-}
-
-static void setup_frame(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  // Set up entropy context depending on frame type. The decoder mandates
-  // the use of the default context, index 0, for keyframes and inter
-  // frames where the error_resilient_mode or intra_only flag is set. For
-  // other inter-frames the encoder currently uses only two contexts;
-  // context 1 for ALTREF frames and context 0 for the others.
-
-  cm->primary_ref_frame = PRIMARY_REF_NONE;
-  if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
-      cm->force_primary_ref_none) {
-    av1_setup_past_independence(cm);
-    for (int i = 0; i < REF_FRAMES; i++) {
-      cm->fb_of_context_type[i] = -1;
-    }
-    cm->fb_of_context_type[REGULAR_FRAME] =
-        cm->show_frame ? get_ref_frame_map_idx(cpi, GOLDEN_FRAME)
-                       : get_ref_frame_map_idx(cpi, ALTREF_FRAME);
-    cm->frame_context_idx = REGULAR_FRAME;
-  } else {
-    const GF_GROUP *gf_group = &cpi->twopass.gf_group;
-    if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE)
-      cm->frame_context_idx = EXT_ARF_FRAME;
-    else if (cpi->refresh_alt_ref_frame)
-      cm->frame_context_idx = ARF_FRAME;
-    else if (cpi->rc.is_src_frame_alt_ref)
-      cm->frame_context_idx = OVERLAY_FRAME;
-    else if (cpi->refresh_golden_frame)
-      cm->frame_context_idx = GLD_FRAME;
-    else if (cpi->refresh_bwd_ref_frame)
-      cm->frame_context_idx = BRF_FRAME;
-    else
-      cm->frame_context_idx = REGULAR_FRAME;
-    int wanted_fb = cm->fb_of_context_type[cm->frame_context_idx];
-    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
-      int fb = get_ref_frame_map_idx(cpi, ref_frame);
-      if (fb == wanted_fb) {
-        cm->primary_ref_frame = ref_frame - LAST_FRAME;
-      }
-    }
-  }
-
-  if (cm->frame_type == KEY_FRAME && cm->show_frame) {
-    cpi->refresh_golden_frame = 1;
-    cpi->refresh_alt_ref_frame = 1;
-    av1_zero(cpi->interp_filter_selected);
-    set_sb_size(&cm->seq_params, select_sb_size(cpi));
-    set_use_reference_buffer(cm, 0);
-  } else if (frame_is_sframe(cm)) {
-    cpi->refresh_golden_frame = 1;
-    cpi->refresh_alt_ref_frame = 1;
-    av1_zero(cpi->interp_filter_selected);
-    set_sb_size(&cm->seq_params, select_sb_size(cpi));
-  } else {
-    if (cm->primary_ref_frame == PRIMARY_REF_NONE ||
-        cm->frame_refs[cm->primary_ref_frame].idx < 0) {
-      av1_setup_past_independence(cm);
-      cm->seg.update_map = 1;
-      cm->seg.update_data = 1;
-    } else {
-      *cm->fc = cm->frame_contexts[cm->frame_refs[cm->primary_ref_frame].idx];
-    }
-    av1_zero(cpi->interp_filter_selected[0]);
-  }
-
-  cm->prev_frame = get_prev_frame(cm);
-  cpi->vaq_refresh = 0;
-}
-
-static void enc_setup_mi(AV1_COMMON *cm) {
-  int i;
-  int mi_rows_sb_aligned = calc_mi_size(cm->mi_rows);
-  cm->mi = cm->mip;
-  memset(cm->mip, 0, cm->mi_stride * mi_rows_sb_aligned * sizeof(*cm->mip));
-  cm->prev_mi = cm->prev_mip;
-  // Clear top border row
-  memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
-  // Clear left border column
-  for (i = 0; i < mi_rows_sb_aligned; ++i)
-    memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
-  cm->mi_grid_visible = cm->mi_grid_base;
-  cm->prev_mi_grid_visible = cm->prev_mi_grid_base;
-
-  memset(cm->mi_grid_base, 0,
-         cm->mi_stride * mi_rows_sb_aligned * sizeof(*cm->mi_grid_base));
-}
-
-static int enc_alloc_mi(AV1_COMMON *cm, int mi_size) {
-  cm->mip = aom_calloc(mi_size, sizeof(*cm->mip));
-  if (!cm->mip) return 1;
-  cm->prev_mip = aom_calloc(mi_size, sizeof(*cm->prev_mip));
-  if (!cm->prev_mip) return 1;
-  cm->mi_alloc_size = mi_size;
-
-  cm->mi_grid_base =
-      (MB_MODE_INFO **)aom_calloc(mi_size, sizeof(MB_MODE_INFO *));
-  if (!cm->mi_grid_base) return 1;
-  cm->prev_mi_grid_base =
-      (MB_MODE_INFO **)aom_calloc(mi_size, sizeof(MB_MODE_INFO *));
-  if (!cm->prev_mi_grid_base) return 1;
-
-  return 0;
-}
-
-static void enc_free_mi(AV1_COMMON *cm) {
-  aom_free(cm->mip);
-  cm->mip = NULL;
-  aom_free(cm->prev_mip);
-  cm->prev_mip = NULL;
-  aom_free(cm->mi_grid_base);
-  cm->mi_grid_base = NULL;
-  aom_free(cm->prev_mi_grid_base);
-  cm->prev_mi_grid_base = NULL;
-  cm->mi_alloc_size = 0;
-}
-
-static void swap_mi_and_prev_mi(AV1_COMMON *cm) {
-  // Current mip will be the prev_mip for the next frame.
-  MB_MODE_INFO **temp_base = cm->prev_mi_grid_base;
-  MB_MODE_INFO *temp = cm->prev_mip;
-  cm->prev_mip = cm->mip;
-  cm->mip = temp;
-
-  // Update the upper left visible macroblock ptrs.
-  cm->mi = cm->mip;
-  cm->prev_mi = cm->prev_mip;
-
-  cm->prev_mi_grid_base = cm->mi_grid_base;
-  cm->mi_grid_base = temp_base;
-  cm->mi_grid_visible = cm->mi_grid_base;
-  cm->prev_mi_grid_visible = cm->prev_mi_grid_base;
-}
-
-void av1_initialize_enc(void) {
-  av1_rtcd();
-  aom_dsp_rtcd();
-  aom_scale_rtcd();
-  av1_init_intra_predictors();
-  av1_init_me_luts();
-  av1_rc_init_minq_luts();
-  av1_init_wedge_masks();
-}
-
-static void dealloc_context_buffers_ext(AV1_COMP *cpi) {
-  if (cpi->mbmi_ext_base) {
-    aom_free(cpi->mbmi_ext_base);
-    cpi->mbmi_ext_base = NULL;
-  }
-}
-
-static void alloc_context_buffers_ext(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  int mi_size = cm->mi_cols * cm->mi_rows;
-
-  dealloc_context_buffers_ext(cpi);
-  CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base,
-                  aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)));
-}
-
-static void update_film_grain_parameters(struct AV1_COMP *cpi,
-                                         const AV1EncoderConfig *oxcf) {
-  AV1_COMMON *const cm = &cpi->common;
-  cpi->oxcf = *oxcf;
-
-  if (cpi->film_grain_table) {
-    aom_film_grain_table_free(cpi->film_grain_table);
-    aom_free(cpi->film_grain_table);
-    cpi->film_grain_table = NULL;
-  }
-
-  if (oxcf->film_grain_test_vector) {
-    cm->seq_params.film_grain_params_present = 1;
-    if (cm->frame_type == KEY_FRAME) {
-      memcpy(&cm->film_grain_params,
-             film_grain_test_vectors + oxcf->film_grain_test_vector - 1,
-             sizeof(cm->film_grain_params));
-
-      cm->film_grain_params.bit_depth = cm->seq_params.bit_depth;
-      if (cm->seq_params.color_range == AOM_CR_FULL_RANGE) {
-        cm->film_grain_params.clip_to_restricted_range = 0;
-      }
-    }
-  } else if (oxcf->film_grain_table_filename) {
-    cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table));
-    memset(cpi->film_grain_table, 0, sizeof(aom_film_grain_table_t));
-
-    aom_film_grain_table_read(cpi->film_grain_table,
-                              oxcf->film_grain_table_filename, &cm->error);
-  } else {
-    cm->seq_params.film_grain_params_present = 0;
-    memset(&cm->film_grain_params, 0, sizeof(cm->film_grain_params));
-  }
-}
-
-static void dealloc_compressor_data(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-
-  dealloc_context_buffers_ext(cpi);
-
-  aom_free(cpi->tile_data);
-  cpi->tile_data = NULL;
-
-  // Delete sementation map
-  aom_free(cpi->segmentation_map);
-  cpi->segmentation_map = NULL;
-
-  av1_cyclic_refresh_free(cpi->cyclic_refresh);
-  cpi->cyclic_refresh = NULL;
-
-  aom_free(cpi->active_map.map);
-  cpi->active_map.map = NULL;
-
-  aom_free(cpi->td.mb.above_pred_buf);
-  cpi->td.mb.above_pred_buf = NULL;
-
-  aom_free(cpi->td.mb.left_pred_buf);
-  cpi->td.mb.left_pred_buf = NULL;
-
-  aom_free(cpi->td.mb.wsrc_buf);
-  cpi->td.mb.wsrc_buf = NULL;
-
-  for (int i = 0; i < 2; i++)
-    for (int j = 0; j < 2; j++) {
-      aom_free(cpi->td.mb.hash_value_buffer[i][j]);
-      cpi->td.mb.hash_value_buffer[i][j] = NULL;
-    }
-  aom_free(cpi->td.mb.mask_buf);
-  cpi->td.mb.mask_buf = NULL;
-
-  aom_free(cm->tpl_mvs);
-  cm->tpl_mvs = NULL;
-
-  av1_free_ref_frame_buffers(cm->buffer_pool);
-  av1_free_txb_buf(cpi);
-  av1_free_context_buffers(cm);
-
-  aom_free_frame_buffer(&cpi->last_frame_uf);
-  av1_free_restoration_buffers(cm);
-  aom_free_frame_buffer(&cpi->trial_frame_rst);
-  aom_free_frame_buffer(&cpi->scaled_source);
-  aom_free_frame_buffer(&cpi->scaled_last_source);
-  aom_free_frame_buffer(&cpi->alt_ref_buffer);
-  av1_lookahead_destroy(cpi->lookahead);
-
-  aom_free(cpi->tile_tok[0][0]);
-  cpi->tile_tok[0][0] = 0;
-
-  aom_free(cpi->tplist[0][0]);
-  cpi->tplist[0][0] = NULL;
-
-  av1_free_pc_tree(&cpi->td, num_planes);
-
-  aom_free(cpi->td.mb.palette_buffer);
-
-  aom_free(cpi->td.mb.tmp_conv_dst);
-  for (int j = 0; j < 2; ++j) {
-    aom_free(cpi->td.mb.tmp_obmc_bufs[j]);
-  }
-
-#if CONFIG_DENOISE
-  if (cpi->denoise_and_model) {
-    aom_denoise_and_model_free(cpi->denoise_and_model);
-    cpi->denoise_and_model = NULL;
-  }
-#endif
-  if (cpi->film_grain_table) {
-    aom_film_grain_table_free(cpi->film_grain_table);
-    cpi->film_grain_table = NULL;
-  }
-}
-
-static void save_coding_context(AV1_COMP *cpi) {
-  CODING_CONTEXT *const cc = &cpi->coding_context;
-  AV1_COMMON *cm = &cpi->common;
-
-  // Stores a snapshot of key state variables which can subsequently be
-  // restored with a call to av1_restore_coding_context. These functions are
-  // intended for use in a re-code loop in av1_compress_frame where the
-  // quantizer value is adjusted between loop iterations.
-  av1_copy(cc->nmv_vec_cost, cpi->td.mb.nmv_vec_cost);
-  av1_copy(cc->nmv_costs, cpi->nmv_costs);
-  av1_copy(cc->nmv_costs_hp, cpi->nmv_costs_hp);
-
-  cc->fc = *cm->fc;
-}
-
-static void restore_coding_context(AV1_COMP *cpi) {
-  CODING_CONTEXT *const cc = &cpi->coding_context;
-  AV1_COMMON *cm = &cpi->common;
-
-  // Restore key state variables to the snapshot state stored in the
-  // previous call to av1_save_coding_context.
-  av1_copy(cpi->td.mb.nmv_vec_cost, cc->nmv_vec_cost);
-  av1_copy(cpi->nmv_costs, cc->nmv_costs);
-  av1_copy(cpi->nmv_costs_hp, cc->nmv_costs_hp);
-
-  *cm->fc = cc->fc;
-}
-
-static void configure_static_seg_features(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  struct segmentation *const seg = &cm->seg;
-
-  int high_q = (int)(rc->avg_q > 48.0);
-  int qi_delta;
-
-  // Disable and clear down for KF
-  if (cm->frame_type == KEY_FRAME) {
-    // Clear down the global segmentation map
-    memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
-    seg->update_map = 0;
-    seg->update_data = 0;
-    cpi->static_mb_pct = 0;
-
-    // Disable segmentation
-    av1_disable_segmentation(seg);
-
-    // Clear down the segment features.
-    av1_clearall_segfeatures(seg);
-  } else if (cpi->refresh_alt_ref_frame) {
-    // If this is an alt ref frame
-    // Clear down the global segmentation map
-    memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
-    seg->update_map = 0;
-    seg->update_data = 0;
-    cpi->static_mb_pct = 0;
-
-    // Disable segmentation and individual segment features by default
-    av1_disable_segmentation(seg);
-    av1_clearall_segfeatures(seg);
-
-    // Scan frames from current to arf frame.
-    // This function re-enables segmentation if appropriate.
-    av1_update_mbgraph_stats(cpi);
-
-    // If segmentation was enabled set those features needed for the
-    // arf itself.
-    if (seg->enabled) {
-      seg->update_map = 1;
-      seg->update_data = 1;
-
-      qi_delta = av1_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875,
-                                    cm->seq_params.bit_depth);
-      av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
-      av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2);
-      av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2);
-      av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_U, -2);
-      av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_V, -2);
-
-      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_H);
-      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_V);
-      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_U);
-      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_V);
-
-      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
-    }
-  } else if (seg->enabled) {
-    // All other frames if segmentation has been enabled
-
-    // First normal frame in a valid gf or alt ref group
-    if (rc->frames_since_golden == 0) {
-      // Set up segment features for normal frames in an arf group
-      if (rc->source_alt_ref_active) {
-        seg->update_map = 0;
-        seg->update_data = 1;
-
-        qi_delta = av1_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125,
-                                      cm->seq_params.bit_depth);
-        av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
-        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
-
-        av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2);
-        av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2);
-        av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_U, -2);
-        av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_V, -2);
-
-        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_H);
-        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_V);
-        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_U);
-        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_V);
-
-        // Segment coding disabled for compred testing
-        if (high_q || (cpi->static_mb_pct == 100)) {
-          av1_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
-          av1_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
-          av1_enable_segfeature(seg, 1, SEG_LVL_SKIP);
-        }
-      } else {
-        // Disable segmentation and clear down features if alt ref
-        // is not active for this group
-
-        av1_disable_segmentation(seg);
-
-        memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
-
-        seg->update_map = 0;
-        seg->update_data = 0;
-
-        av1_clearall_segfeatures(seg);
-      }
-    } else if (rc->is_src_frame_alt_ref) {
-      // Special case where we are coding over the top of a previous
-      // alt ref frame.
-      // Segment coding disabled for compred testing
-
-      // Enable ref frame features for segment 0 as well
-      av1_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
-      av1_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
-
-      // All mbs should use ALTREF_FRAME
-      av1_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
-      av1_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
-      av1_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
-      av1_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
-
-      // Skip all MBs if high Q (0,0 mv and skip coeffs)
-      if (high_q) {
-        av1_enable_segfeature(seg, 0, SEG_LVL_SKIP);
-        av1_enable_segfeature(seg, 1, SEG_LVL_SKIP);
-      }
-      // Enable data update
-      seg->update_data = 1;
-    } else {
-      // All other frames.
-
-      // No updates.. leave things as they are.
-      seg->update_map = 0;
-      seg->update_data = 0;
-    }
-  }
-}
-
-static void update_reference_segmentation_map(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  MB_MODE_INFO **mi_4x4_ptr = cm->mi_grid_visible;
-  uint8_t *cache_ptr = cm->current_frame_seg_map;
-  int row, col;
-
-  for (row = 0; row < cm->mi_rows; row++) {
-    MB_MODE_INFO **mi_4x4 = mi_4x4_ptr;
-    uint8_t *cache = cache_ptr;
-    for (col = 0; col < cm->mi_cols; col++, mi_4x4++, cache++)
-      cache[0] = mi_4x4[0]->segment_id;
-    mi_4x4_ptr += cm->mi_stride;
-    cache_ptr += cm->mi_cols;
-  }
-}
-
-static void alloc_raw_frame_buffers(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  const SequenceHeader *const seq_params = &cm->seq_params;
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-
-  if (!cpi->lookahead)
-    cpi->lookahead =
-        av1_lookahead_init(oxcf->width, oxcf->height, seq_params->subsampling_x,
-                           seq_params->subsampling_y,
-                           seq_params->use_highbitdepth, oxcf->lag_in_frames);
-  if (!cpi->lookahead)
-    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                       "Failed to allocate lag buffers");
-
-  // TODO(agrange) Check if ARF is enabled and skip allocation if not.
-  if (aom_realloc_frame_buffer(
-          &cpi->alt_ref_buffer, oxcf->width, oxcf->height,
-          seq_params->subsampling_x, seq_params->subsampling_y,
-          seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
-          cm->byte_alignment, NULL, NULL, NULL))
-    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                       "Failed to allocate altref buffer");
-}
-
-static void alloc_util_frame_buffers(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const SequenceHeader *const seq_params = &cm->seq_params;
-  if (aom_realloc_frame_buffer(
-          &cpi->last_frame_uf, cm->width, cm->height, seq_params->subsampling_x,
-          seq_params->subsampling_y, seq_params->use_highbitdepth,
-          AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
-    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                       "Failed to allocate last frame buffer");
-
-  if (aom_realloc_frame_buffer(
-          &cpi->trial_frame_rst, cm->superres_upscaled_width,
-          cm->superres_upscaled_height, seq_params->subsampling_x,
-          seq_params->subsampling_y, seq_params->use_highbitdepth,
-          AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
-    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                       "Failed to allocate trial restored frame buffer");
-
-  if (aom_realloc_frame_buffer(
-          &cpi->scaled_source, cm->width, cm->height, seq_params->subsampling_x,
-          seq_params->subsampling_y, seq_params->use_highbitdepth,
-          AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
-    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                       "Failed to allocate scaled source buffer");
-
-  if (aom_realloc_frame_buffer(
-          &cpi->scaled_last_source, cm->width, cm->height,
-          seq_params->subsampling_x, seq_params->subsampling_y,
-          seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
-          cm->byte_alignment, NULL, NULL, NULL))
-    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                       "Failed to allocate scaled last source buffer");
-}
-
-static void alloc_compressor_data(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-
-  av1_alloc_context_buffers(cm, cm->width, cm->height);
-
-  int mi_rows_aligned_to_sb =
-      ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
-  int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2;
-
-  av1_alloc_txb_buf(cpi);
-
-  alloc_context_buffers_ext(cpi);
-
-  aom_free(cpi->tile_tok[0][0]);
-
-  {
-    unsigned int tokens =
-        get_token_alloc(cm->mb_rows, cm->mb_cols, MAX_SB_SIZE_LOG2, num_planes);
-    CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
-                    aom_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
-  }
-  aom_free(cpi->tplist[0][0]);
-
-  CHECK_MEM_ERROR(cm, cpi->tplist[0][0],
-                  aom_calloc(sb_rows * MAX_TILE_ROWS * MAX_TILE_COLS,
-                             sizeof(*cpi->tplist[0][0])));
-
-  av1_setup_pc_tree(&cpi->common, &cpi->td);
-}
-
-void av1_new_framerate(AV1_COMP *cpi, double framerate) {
-  cpi->framerate = framerate < 0.1 ? 30 : framerate;
-  av1_rc_update_framerate(cpi, cpi->common.width, cpi->common.height);
-}
-
-static void set_tile_info(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  int i, start_sb;
-
-  av1_get_tile_limits(cm);
-
-  // configure tile columns
-  if (cpi->oxcf.tile_width_count == 0 || cpi->oxcf.tile_height_count == 0) {
-    cm->uniform_tile_spacing_flag = 1;
-    cm->log2_tile_cols = AOMMAX(cpi->oxcf.tile_columns, cm->min_log2_tile_cols);
-    cm->log2_tile_cols = AOMMIN(cm->log2_tile_cols, cm->max_log2_tile_cols);
-  } else {
-    int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
-    int sb_cols = mi_cols >> cm->seq_params.mib_size_log2;
-    int size_sb, j = 0;
-    cm->uniform_tile_spacing_flag = 0;
-    for (i = 0, start_sb = 0; start_sb < sb_cols && i < MAX_TILE_COLS; i++) {
-      cm->tile_col_start_sb[i] = start_sb;
-      size_sb = cpi->oxcf.tile_widths[j++];
-      if (j >= cpi->oxcf.tile_width_count) j = 0;
-      start_sb += AOMMIN(size_sb, cm->max_tile_width_sb);
-    }
-    cm->tile_cols = i;
-    cm->tile_col_start_sb[i] = sb_cols;
-  }
-  av1_calculate_tile_cols(cm);
-
-  // configure tile rows
-  if (cm->uniform_tile_spacing_flag) {
-    cm->log2_tile_rows = AOMMAX(cpi->oxcf.tile_rows, cm->min_log2_tile_rows);
-    cm->log2_tile_rows = AOMMIN(cm->log2_tile_rows, cm->max_log2_tile_rows);
-  } else {
-    int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
-    int sb_rows = mi_rows >> cm->seq_params.mib_size_log2;
-    int size_sb, j = 0;
-    for (i = 0, start_sb = 0; start_sb < sb_rows && i < MAX_TILE_ROWS; i++) {
-      cm->tile_row_start_sb[i] = start_sb;
-      size_sb = cpi->oxcf.tile_heights[j++];
-      if (j >= cpi->oxcf.tile_height_count) j = 0;
-      start_sb += AOMMIN(size_sb, cm->max_tile_height_sb);
-    }
-    cm->tile_rows = i;
-    cm->tile_row_start_sb[i] = sb_rows;
-  }
-  av1_calculate_tile_rows(cm);
-}
-
-static void update_frame_size(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-
-  av1_set_mb_mi(cm, cm->width, cm->height);
-  av1_init_context_buffers(cm);
-  av1_init_macroblockd(cm, xd, NULL);
-  memset(cpi->mbmi_ext_base, 0,
-         cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
-  set_tile_info(cpi);
-}
-
-static void init_buffer_indices(AV1_COMP *cpi) {
-  int fb_idx;
-  for (fb_idx = 0; fb_idx < REF_FRAMES; ++fb_idx)
-    cpi->ref_fb_idx[fb_idx] = fb_idx;
-  cpi->rate_index = 0;
-  cpi->rate_size = 0;
-  cpi->cur_poc = -1;
-}
-
-static INLINE int does_level_match(int width, int height, double fps,
-                                   int lvl_width, int lvl_height,
-                                   double lvl_fps, int lvl_dim_mult) {
-  const int64_t lvl_luma_pels = lvl_width * lvl_height;
-  const double lvl_display_sample_rate = lvl_luma_pels * lvl_fps;
-  const int64_t luma_pels = width * height;
-  const double display_sample_rate = luma_pels * fps;
-  return luma_pels <= lvl_luma_pels &&
-         display_sample_rate <= lvl_display_sample_rate &&
-         width <= lvl_width * lvl_dim_mult &&
-         height <= lvl_height * lvl_dim_mult;
-}
-
-static void set_bitstream_level_tier(SequenceHeader *seq, AV1_COMMON *cm,
-                                     const AV1EncoderConfig *oxcf) {
-  // TODO(any): This is a placeholder function that only addresses dimensions
-  // and max display sample rates.
-  // Need to add checks for max bit rate, max decoded luma sample rate, header
-  // rate, etc. that are not covered by this function.
-  (void)oxcf;
-  BitstreamLevel bl = { 9, 3 };
-  if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, 512,
-                       288, 30.0, 4)) {
-    bl.major = 2;
-    bl.minor = 0;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              704, 396, 30.0, 4)) {
-    bl.major = 2;
-    bl.minor = 1;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              1088, 612, 30.0, 4)) {
-    bl.major = 3;
-    bl.minor = 0;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              1376, 774, 30.0, 4)) {
-    bl.major = 3;
-    bl.minor = 1;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              2048, 1152, 30.0, 3)) {
-    bl.major = 4;
-    bl.minor = 0;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              2048, 1152, 60.0, 3)) {
-    bl.major = 4;
-    bl.minor = 1;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              4096, 2176, 30.0, 2)) {
-    bl.major = 5;
-    bl.minor = 0;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              4096, 2176, 60.0, 2)) {
-    bl.major = 5;
-    bl.minor = 1;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              4096, 2176, 120.0, 2)) {
-    bl.major = 5;
-    bl.minor = 2;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              8192, 4352, 30.0, 2)) {
-    bl.major = 6;
-    bl.minor = 0;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              8192, 4352, 60.0, 2)) {
-    bl.major = 6;
-    bl.minor = 1;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              8192, 4352, 120.0, 2)) {
-    bl.major = 6;
-    bl.minor = 2;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              16384, 8704, 30.0, 2)) {
-    bl.major = 7;
-    bl.minor = 0;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              16384, 8704, 60.0, 2)) {
-    bl.major = 7;
-    bl.minor = 1;
-  } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate,
-                              16384, 8704, 120.0, 2)) {
-    bl.major = 7;
-    bl.minor = 2;
-  }
-  for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i) {
-    seq->level[i] = bl;
-    seq->tier[i] = 0;  // setting main tier by default
-    // Set the maximum parameters for bitrate and buffer size for this profile,
-    // level, and tier
-    cm->op_params[i].bitrate = max_level_bitrate(
-        cm->seq_params.profile, major_minor_to_seq_level_idx(seq->level[i]),
-        seq->tier[i]);
-    // Level with seq_level_idx = 31 returns a high "dummy" bitrate to pass the
-    // check
-    if (cm->op_params[i].bitrate == 0)
-      aom_internal_error(
-          &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
-          "AV1 does not support this combination of profile, level, and tier.");
-    // Buffer size in bits/s is bitrate in bits/s * 1 s
-    cm->op_params[i].buffer_size = cm->op_params[i].bitrate;
-  }
-}
-
-static void init_seq_coding_tools(SequenceHeader *seq, AV1_COMMON *cm,
-                                  const AV1EncoderConfig *oxcf) {
-  seq->still_picture = (oxcf->limit == 1);
-  seq->reduced_still_picture_hdr = seq->still_picture;
-  seq->reduced_still_picture_hdr &= !oxcf->full_still_picture_hdr;
-  seq->force_screen_content_tools = 2;
-  seq->force_integer_mv = 2;
-  seq->enable_order_hint = oxcf->enable_order_hint;
-  seq->frame_id_numbers_present_flag = oxcf->large_scale_tile;
-  if (seq->still_picture && seq->reduced_still_picture_hdr) {
-    seq->enable_order_hint = 0;
-    seq->frame_id_numbers_present_flag = 0;
-    seq->force_screen_content_tools = 2;
-    seq->force_integer_mv = 2;
-  }
-  seq->order_hint_bits_minus_1 =
-      seq->enable_order_hint ? DEFAULT_EXPLICIT_ORDER_HINT_BITS - 1 : -1;
-
-  seq->enable_dual_filter = oxcf->enable_dual_filter;
-  seq->enable_jnt_comp = oxcf->enable_jnt_comp;
-  seq->enable_jnt_comp &= seq->enable_order_hint;
-  seq->enable_ref_frame_mvs = oxcf->enable_ref_frame_mvs;
-  seq->enable_ref_frame_mvs &= seq->enable_order_hint;
-  seq->enable_superres = oxcf->enable_superres;
-  seq->enable_cdef = oxcf->enable_cdef;
-  seq->enable_restoration = oxcf->enable_restoration;
-  seq->enable_warped_motion = oxcf->enable_warped_motion;
-  seq->enable_interintra_compound = 1;
-  seq->enable_masked_compound = 1;
-  seq->enable_intra_edge_filter = 1;
-  seq->enable_filter_intra = 1;
-
-  set_bitstream_level_tier(seq, cm, oxcf);
-
-  if (seq->operating_points_cnt_minus_1 == 0) {
-    seq->operating_point_idc[0] = 0;
-  } else {
-    // Set operating_point_idc[] such that for the i-th operating point the
-    // first (operating_points_cnt-i) spatial layers and the first temporal
-    // layer are decoded Note that highest quality operating point should come
-    // first
-    for (int i = 0; i < seq->operating_points_cnt_minus_1 + 1; i++)
-      seq->operating_point_idc[i] =
-          (~(~0u << (seq->operating_points_cnt_minus_1 + 1 - i)) << 8) | 1;
-  }
-}
-
-static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
-  AV1_COMMON *const cm = &cpi->common;
-
-  cpi->oxcf = *oxcf;
-  cpi->framerate = oxcf->init_framerate;
-
-  cm->seq_params.profile = oxcf->profile;
-  cm->seq_params.bit_depth = oxcf->bit_depth;
-  cm->seq_params.use_highbitdepth = oxcf->use_highbitdepth;
-  cm->seq_params.color_primaries = oxcf->color_primaries;
-  cm->seq_params.transfer_characteristics = oxcf->transfer_characteristics;
-  cm->seq_params.matrix_coefficients = oxcf->matrix_coefficients;
-  cm->seq_params.monochrome = oxcf->monochrome;
-  cm->seq_params.chroma_sample_position = oxcf->chroma_sample_position;
-  cm->seq_params.color_range = oxcf->color_range;
-  cm->timing_info_present = oxcf->timing_info_present;
-  cm->timing_info.num_units_in_display_tick =
-      oxcf->timing_info.num_units_in_display_tick;
-  cm->timing_info.time_scale = oxcf->timing_info.time_scale;
-  cm->timing_info.equal_picture_interval =
-      oxcf->timing_info.equal_picture_interval;
-  cm->timing_info.num_ticks_per_picture =
-      oxcf->timing_info.num_ticks_per_picture;
-
-  cm->seq_params.display_model_info_present_flag =
-      oxcf->display_model_info_present_flag;
-  cm->seq_params.decoder_model_info_present_flag =
-      oxcf->decoder_model_info_present_flag;
-  if (oxcf->decoder_model_info_present_flag) {
-    // set the decoder model parameters in schedule mode
-    cm->buffer_model.num_units_in_decoding_tick =
-        oxcf->buffer_model.num_units_in_decoding_tick;
-    cm->buffer_removal_time_present = 1;
-    set_aom_dec_model_info(&cm->buffer_model);
-    set_dec_model_op_parameters(&cm->op_params[0]);
-  } else if (cm->timing_info_present &&
-             cm->timing_info.equal_picture_interval &&
-             !cm->seq_params.decoder_model_info_present_flag) {
-    // set the decoder model parameters in resource availability mode
-    set_resource_availability_parameters(&cm->op_params[0]);
-  } else {
-    cm->op_params[0].initial_display_delay =
-        10;  // Default value (not signaled)
-  }
-
-  if (cm->seq_params.monochrome) {
-    cm->seq_params.subsampling_x = 1;
-    cm->seq_params.subsampling_y = 1;
-  } else if (cm->seq_params.color_primaries == AOM_CICP_CP_BT_709 &&
-             cm->seq_params.transfer_characteristics == AOM_CICP_TC_SRGB &&
-             cm->seq_params.matrix_coefficients == AOM_CICP_MC_IDENTITY) {
-    cm->seq_params.subsampling_x = 0;
-    cm->seq_params.subsampling_y = 0;
-  } else {
-    if (cm->seq_params.profile == 0) {
-      cm->seq_params.subsampling_x = 1;
-      cm->seq_params.subsampling_y = 1;
-    } else if (cm->seq_params.profile == 1) {
-      cm->seq_params.subsampling_x = 0;
-      cm->seq_params.subsampling_y = 0;
-    } else {
-      if (cm->seq_params.bit_depth == AOM_BITS_12) {
-        cm->seq_params.subsampling_x = oxcf->chroma_subsampling_x;
-        cm->seq_params.subsampling_y = oxcf->chroma_subsampling_y;
-      } else {
-        cm->seq_params.subsampling_x = 1;
-        cm->seq_params.subsampling_y = 0;
-      }
-    }
-  }
-
-  cm->width = oxcf->width;
-  cm->height = oxcf->height;
-  set_sb_size(&cm->seq_params,
-              select_sb_size(cpi));  // set sb size before allocations
-  alloc_compressor_data(cpi);
-
-  update_film_grain_parameters(cpi, oxcf);
-
-  // Single thread case: use counts in common.
-  cpi->td.counts = &cpi->counts;
-
-  // change includes all joint functionality
-  av1_change_config(cpi, oxcf);
-
-  cpi->static_mb_pct = 0;
-  cpi->ref_frame_flags = 0;
-
-  // Reset resize pending flags
-  cpi->resize_pending_width = 0;
-  cpi->resize_pending_height = 0;
-
-  init_buffer_indices(cpi);
-}
-
-static void set_rc_buffer_sizes(RATE_CONTROL *rc,
-                                const AV1EncoderConfig *oxcf) {
-  const int64_t bandwidth = oxcf->target_bandwidth;
-  const int64_t starting = oxcf->starting_buffer_level_ms;
-  const int64_t optimal = oxcf->optimal_buffer_level_ms;
-  const int64_t maximum = oxcf->maximum_buffer_size_ms;
-
-  rc->starting_buffer_level = starting * bandwidth / 1000;
-  rc->optimal_buffer_level =
-      (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
-  rc->maximum_buffer_size =
-      (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
-}
-
-#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \
-  cpi->fn_ptr[BT].sdf = SDF;                                           \
-  cpi->fn_ptr[BT].sdaf = SDAF;                                         \
-  cpi->fn_ptr[BT].vf = VF;                                             \
-  cpi->fn_ptr[BT].svf = SVF;                                           \
-  cpi->fn_ptr[BT].svaf = SVAF;                                         \
-  cpi->fn_ptr[BT].sdx4df = SDX4DF;                                     \
-  cpi->fn_ptr[BT].jsdaf = JSDAF;                                       \
-  cpi->fn_ptr[BT].jsvaf = JSVAF;
-
-#define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
-  static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
-                                     int source_stride,                        \
-                                     const uint8_t *ref_ptr, int ref_stride) { \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride);                \
-  }                                                                            \
-  static unsigned int fnname##_bits10(                                         \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
-      int ref_stride) {                                                        \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;           \
-  }                                                                            \
-  static unsigned int fnname##_bits12(                                         \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
-      int ref_stride) {                                                        \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;           \
-  }
-
-#define MAKE_BFP_SADAVG_WRAPPER(fnname)                                        \
-  static unsigned int fnname##_bits8(                                          \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
-      int ref_stride, const uint8_t *second_pred) {                            \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred);   \
-  }                                                                            \
-  static unsigned int fnname##_bits10(                                         \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
-      int ref_stride, const uint8_t *second_pred) {                            \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
-           2;                                                                  \
-  }                                                                            \
-  static unsigned int fnname##_bits12(                                         \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
-      int ref_stride, const uint8_t *second_pred) {                            \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
-           4;                                                                  \
-  }
-
-#define MAKE_BFP_SAD4D_WRAPPER(fnname)                                        \
-  static void fnname##_bits8(const uint8_t *src_ptr, int source_stride,       \
-                             const uint8_t *const ref_ptr[], int ref_stride,  \
-                             unsigned int *sad_array) {                       \
-    fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
-  }                                                                           \
-  static void fnname##_bits10(const uint8_t *src_ptr, int source_stride,      \
-                              const uint8_t *const ref_ptr[], int ref_stride, \
-                              unsigned int *sad_array) {                      \
-    int i;                                                                    \
-    fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
-    for (i = 0; i < 4; i++) sad_array[i] >>= 2;                               \
-  }                                                                           \
-  static void fnname##_bits12(const uint8_t *src_ptr, int source_stride,      \
-                              const uint8_t *const ref_ptr[], int ref_stride, \
-                              unsigned int *sad_array) {                      \
-    int i;                                                                    \
-    fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
-    for (i = 0; i < 4; i++) sad_array[i] >>= 4;                               \
-  }
-
-#define MAKE_BFP_JSADAVG_WRAPPER(fnname)                                    \
-  static unsigned int fnname##_bits8(                                       \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,    \
-      int ref_stride, const uint8_t *second_pred,                           \
-      const JNT_COMP_PARAMS *jcp_param) {                                   \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \
-                  jcp_param);                                               \
-  }                                                                         \
-  static unsigned int fnname##_bits10(                                      \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,    \
-      int ref_stride, const uint8_t *second_pred,                           \
-      const JNT_COMP_PARAMS *jcp_param) {                                   \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \
-                  jcp_param) >>                                             \
-           2;                                                               \
-  }                                                                         \
-  static unsigned int fnname##_bits12(                                      \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,    \
-      int ref_stride, const uint8_t *second_pred,                           \
-      const JNT_COMP_PARAMS *jcp_param) {                                   \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \
-                  jcp_param) >>                                             \
-           4;                                                               \
-  }
-
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x128)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x128_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x128x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x64)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x64x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x128)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x128_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x128x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x16x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x32)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x32x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x32)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x32x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x64)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x64x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x32)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x32x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x64)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x64x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x16x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x8)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x8_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x8x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x16x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x8)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x8_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x8x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x4)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x4_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x4x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x8)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x8_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x8x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x4)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x4_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d)
-
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x16x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x4)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x4_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x4x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x32)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x32_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x32x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x8)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x8_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x8x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x64)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x64_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x64x4d)
-MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x16)
-MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x16_avg)
-MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x16x4d)
-
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad128x128_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad128x64_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x128_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x16_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x32_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x32_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x64_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x32_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x64_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x16_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x8_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x16_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x8_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x4_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x8_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x4_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x16_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x4_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x32_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x8_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x64_avg)
-MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x16_avg)
-
-#define HIGHBD_MBFP(BT, MCSDF, MCSVF) \
-  cpi->fn_ptr[BT].msdf = MCSDF;       \
-  cpi->fn_ptr[BT].msvf = MCSVF;
-
-#define MAKE_MBFP_COMPOUND_SAD_WRAPPER(fnname)                           \
-  static unsigned int fnname##_bits8(                                    \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
-      int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m,  \
-      int m_stride, int invert_mask) {                                   \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride,           \
-                  second_pred_ptr, m, m_stride, invert_mask);            \
-  }                                                                      \
-  static unsigned int fnname##_bits10(                                   \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
-      int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m,  \
-      int m_stride, int invert_mask) {                                   \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride,           \
-                  second_pred_ptr, m, m_stride, invert_mask) >>          \
-           2;                                                            \
-  }                                                                      \
-  static unsigned int fnname##_bits12(                                   \
-      const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
-      int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m,  \
-      int m_stride, int invert_mask) {                                   \
-    return fnname(src_ptr, source_stride, ref_ptr, ref_stride,           \
-                  second_pred_ptr, m, m_stride, invert_mask) >>          \
-           4;                                                            \
-  }
-
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x128)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x64)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x128)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x64)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x32)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x64)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x32)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x16)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x32)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x16)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x8)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x16)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x8)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x4)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x8)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x4)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x16)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x4)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x32)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x8)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x64)
-MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x16)
-
-#define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \
-  cpi->fn_ptr[BT].osdf = OSDF;           \
-  cpi->fn_ptr[BT].ovf = OVF;             \
-  cpi->fn_ptr[BT].osvf = OSVF;
-
-#define MAKE_OBFP_SAD_WRAPPER(fnname)                                     \
-  static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride,  \
-                                     const int32_t *wsrc,                 \
-                                     const int32_t *msk) {                \
-    return fnname(ref, ref_stride, wsrc, msk);                            \
-  }                                                                       \
-  static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \
-                                      const int32_t *wsrc,                \
-                                      const int32_t *msk) {               \
-    return fnname(ref, ref_stride, wsrc, msk) >> 2;                       \
-  }                                                                       \
-  static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \
-                                      const int32_t *wsrc,                \
-                                      const int32_t *msk) {               \
-    return fnname(ref, ref_stride, wsrc, msk) >> 4;                       \
-  }
-
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad128x128)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad128x64)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x128)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x64)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x32)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x64)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x32)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x16)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x32)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x16)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x8)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x16)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x8)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x4)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x8)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x4)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x16)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x4)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x32)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x8)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x64)
-MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x16)
-
-static void highbd_set_var_fns(AV1_COMP *const cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  if (cm->seq_params.use_highbitdepth) {
-    switch (cm->seq_params.bit_depth) {
-      case AOM_BITS_8:
-        HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits8,
-                   aom_highbd_sad64x16_avg_bits8, aom_highbd_8_variance64x16,
-                   aom_highbd_8_sub_pixel_variance64x16,
-                   aom_highbd_8_sub_pixel_avg_variance64x16,
-                   aom_highbd_sad64x16x4d_bits8,
-                   aom_highbd_jnt_sad64x16_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance64x16)
-
-        HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits8,
-                   aom_highbd_sad16x64_avg_bits8, aom_highbd_8_variance16x64,
-                   aom_highbd_8_sub_pixel_variance16x64,
-                   aom_highbd_8_sub_pixel_avg_variance16x64,
-                   aom_highbd_sad16x64x4d_bits8,
-                   aom_highbd_jnt_sad16x64_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance16x64)
-
-        HIGHBD_BFP(
-            BLOCK_32X8, aom_highbd_sad32x8_bits8, aom_highbd_sad32x8_avg_bits8,
-            aom_highbd_8_variance32x8, aom_highbd_8_sub_pixel_variance32x8,
-            aom_highbd_8_sub_pixel_avg_variance32x8,
-            aom_highbd_sad32x8x4d_bits8, aom_highbd_jnt_sad32x8_avg_bits8,
-            aom_highbd_8_jnt_sub_pixel_avg_variance32x8)
-
-        HIGHBD_BFP(
-            BLOCK_8X32, aom_highbd_sad8x32_bits8, aom_highbd_sad8x32_avg_bits8,
-            aom_highbd_8_variance8x32, aom_highbd_8_sub_pixel_variance8x32,
-            aom_highbd_8_sub_pixel_avg_variance8x32,
-            aom_highbd_sad8x32x4d_bits8, aom_highbd_jnt_sad8x32_avg_bits8,
-            aom_highbd_8_jnt_sub_pixel_avg_variance8x32)
-
-        HIGHBD_BFP(
-            BLOCK_16X4, aom_highbd_sad16x4_bits8, aom_highbd_sad16x4_avg_bits8,
-            aom_highbd_8_variance16x4, aom_highbd_8_sub_pixel_variance16x4,
-            aom_highbd_8_sub_pixel_avg_variance16x4,
-            aom_highbd_sad16x4x4d_bits8, aom_highbd_jnt_sad16x4_avg_bits8,
-            aom_highbd_8_jnt_sub_pixel_avg_variance16x4)
-
-        HIGHBD_BFP(
-            BLOCK_4X16, aom_highbd_sad4x16_bits8, aom_highbd_sad4x16_avg_bits8,
-            aom_highbd_8_variance4x16, aom_highbd_8_sub_pixel_variance4x16,
-            aom_highbd_8_sub_pixel_avg_variance4x16,
-            aom_highbd_sad4x16x4d_bits8, aom_highbd_jnt_sad4x16_avg_bits8,
-            aom_highbd_8_jnt_sub_pixel_avg_variance4x16)
-
-        HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits8,
-                   aom_highbd_sad32x16_avg_bits8, aom_highbd_8_variance32x16,
-                   aom_highbd_8_sub_pixel_variance32x16,
-                   aom_highbd_8_sub_pixel_avg_variance32x16,
-                   aom_highbd_sad32x16x4d_bits8,
-                   aom_highbd_jnt_sad32x16_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance32x16)
-
-        HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits8,
-                   aom_highbd_sad16x32_avg_bits8, aom_highbd_8_variance16x32,
-                   aom_highbd_8_sub_pixel_variance16x32,
-                   aom_highbd_8_sub_pixel_avg_variance16x32,
-                   aom_highbd_sad16x32x4d_bits8,
-                   aom_highbd_jnt_sad16x32_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance16x32)
-
-        HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits8,
-                   aom_highbd_sad64x32_avg_bits8, aom_highbd_8_variance64x32,
-                   aom_highbd_8_sub_pixel_variance64x32,
-                   aom_highbd_8_sub_pixel_avg_variance64x32,
-                   aom_highbd_sad64x32x4d_bits8,
-                   aom_highbd_jnt_sad64x32_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance64x32)
-
-        HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits8,
-                   aom_highbd_sad32x64_avg_bits8, aom_highbd_8_variance32x64,
-                   aom_highbd_8_sub_pixel_variance32x64,
-                   aom_highbd_8_sub_pixel_avg_variance32x64,
-                   aom_highbd_sad32x64x4d_bits8,
-                   aom_highbd_jnt_sad32x64_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance32x64)
-
-        HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits8,
-                   aom_highbd_sad32x32_avg_bits8, aom_highbd_8_variance32x32,
-                   aom_highbd_8_sub_pixel_variance32x32,
-                   aom_highbd_8_sub_pixel_avg_variance32x32,
-                   aom_highbd_sad32x32x4d_bits8,
-                   aom_highbd_jnt_sad32x32_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance32x32)
-
-        HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits8,
-                   aom_highbd_sad64x64_avg_bits8, aom_highbd_8_variance64x64,
-                   aom_highbd_8_sub_pixel_variance64x64,
-                   aom_highbd_8_sub_pixel_avg_variance64x64,
-                   aom_highbd_sad64x64x4d_bits8,
-                   aom_highbd_jnt_sad64x64_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance64x64)
-
-        HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits8,
-                   aom_highbd_sad16x16_avg_bits8, aom_highbd_8_variance16x16,
-                   aom_highbd_8_sub_pixel_variance16x16,
-                   aom_highbd_8_sub_pixel_avg_variance16x16,
-                   aom_highbd_sad16x16x4d_bits8,
-                   aom_highbd_jnt_sad16x16_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance16x16)
-
-        HIGHBD_BFP(
-            BLOCK_16X8, aom_highbd_sad16x8_bits8, aom_highbd_sad16x8_avg_bits8,
-            aom_highbd_8_variance16x8, aom_highbd_8_sub_pixel_variance16x8,
-            aom_highbd_8_sub_pixel_avg_variance16x8,
-            aom_highbd_sad16x8x4d_bits8, aom_highbd_jnt_sad16x8_avg_bits8,
-            aom_highbd_8_jnt_sub_pixel_avg_variance16x8)
-
-        HIGHBD_BFP(
-            BLOCK_8X16, aom_highbd_sad8x16_bits8, aom_highbd_sad8x16_avg_bits8,
-            aom_highbd_8_variance8x16, aom_highbd_8_sub_pixel_variance8x16,
-            aom_highbd_8_sub_pixel_avg_variance8x16,
-            aom_highbd_sad8x16x4d_bits8, aom_highbd_jnt_sad8x16_avg_bits8,
-            aom_highbd_8_jnt_sub_pixel_avg_variance8x16)
-
-        HIGHBD_BFP(BLOCK_8X8, aom_highbd_sad8x8_bits8,
-                   aom_highbd_sad8x8_avg_bits8, aom_highbd_8_variance8x8,
-                   aom_highbd_8_sub_pixel_variance8x8,
-                   aom_highbd_8_sub_pixel_avg_variance8x8,
-                   aom_highbd_sad8x8x4d_bits8, aom_highbd_jnt_sad8x8_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance8x8)
-
-        HIGHBD_BFP(BLOCK_8X4, aom_highbd_sad8x4_bits8,
-                   aom_highbd_sad8x4_avg_bits8, aom_highbd_8_variance8x4,
-                   aom_highbd_8_sub_pixel_variance8x4,
-                   aom_highbd_8_sub_pixel_avg_variance8x4,
-                   aom_highbd_sad8x4x4d_bits8, aom_highbd_jnt_sad8x4_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance8x4)
-
-        HIGHBD_BFP(BLOCK_4X8, aom_highbd_sad4x8_bits8,
-                   aom_highbd_sad4x8_avg_bits8, aom_highbd_8_variance4x8,
-                   aom_highbd_8_sub_pixel_variance4x8,
-                   aom_highbd_8_sub_pixel_avg_variance4x8,
-                   aom_highbd_sad4x8x4d_bits8, aom_highbd_jnt_sad4x8_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance4x8)
-
-        HIGHBD_BFP(BLOCK_4X4, aom_highbd_sad4x4_bits8,
-                   aom_highbd_sad4x4_avg_bits8, aom_highbd_8_variance4x4,
-                   aom_highbd_8_sub_pixel_variance4x4,
-                   aom_highbd_8_sub_pixel_avg_variance4x4,
-                   aom_highbd_sad4x4x4d_bits8, aom_highbd_jnt_sad4x4_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance4x4)
-
-        HIGHBD_BFP(
-            BLOCK_128X128, aom_highbd_sad128x128_bits8,
-            aom_highbd_sad128x128_avg_bits8, aom_highbd_8_variance128x128,
-            aom_highbd_8_sub_pixel_variance128x128,
-            aom_highbd_8_sub_pixel_avg_variance128x128,
-            aom_highbd_sad128x128x4d_bits8, aom_highbd_jnt_sad128x128_avg_bits8,
-            aom_highbd_8_jnt_sub_pixel_avg_variance128x128)
-
-        HIGHBD_BFP(BLOCK_128X64, aom_highbd_sad128x64_bits8,
-                   aom_highbd_sad128x64_avg_bits8, aom_highbd_8_variance128x64,
-                   aom_highbd_8_sub_pixel_variance128x64,
-                   aom_highbd_8_sub_pixel_avg_variance128x64,
-                   aom_highbd_sad128x64x4d_bits8,
-                   aom_highbd_jnt_sad128x64_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance128x64)
-
-        HIGHBD_BFP(BLOCK_64X128, aom_highbd_sad64x128_bits8,
-                   aom_highbd_sad64x128_avg_bits8, aom_highbd_8_variance64x128,
-                   aom_highbd_8_sub_pixel_variance64x128,
-                   aom_highbd_8_sub_pixel_avg_variance64x128,
-                   aom_highbd_sad64x128x4d_bits8,
-                   aom_highbd_jnt_sad64x128_avg_bits8,
-                   aom_highbd_8_jnt_sub_pixel_avg_variance64x128)
-
-        HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance128x128)
-        HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance128x64)
-        HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance64x128)
-        HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance64x64)
-        HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance64x32)
-        HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance32x64)
-        HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance32x32)
-        HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance32x16)
-        HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance16x32)
-        HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance16x16)
-        HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance8x16)
-        HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance16x8)
-        HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance8x8)
-        HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance4x8)
-        HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance8x4)
-        HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance4x4)
-        HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance64x16)
-        HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance16x64)
-        HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance32x8)
-        HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance8x32)
-        HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance16x4)
-        HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits8,
-                    aom_highbd_8_masked_sub_pixel_variance4x16)
-        HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits8,
-                    aom_highbd_obmc_variance128x128,
-                    aom_highbd_obmc_sub_pixel_variance128x128)
-        HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits8,
-                    aom_highbd_obmc_variance128x64,
-                    aom_highbd_obmc_sub_pixel_variance128x64)
-        HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits8,
-                    aom_highbd_obmc_variance64x128,
-                    aom_highbd_obmc_sub_pixel_variance64x128)
-        HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits8,
-                    aom_highbd_obmc_variance64x64,
-                    aom_highbd_obmc_sub_pixel_variance64x64)
-        HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits8,
-                    aom_highbd_obmc_variance64x32,
-                    aom_highbd_obmc_sub_pixel_variance64x32)
-        HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits8,
-                    aom_highbd_obmc_variance32x64,
-                    aom_highbd_obmc_sub_pixel_variance32x64)
-        HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits8,
-                    aom_highbd_obmc_variance32x32,
-                    aom_highbd_obmc_sub_pixel_variance32x32)
-        HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits8,
-                    aom_highbd_obmc_variance32x16,
-                    aom_highbd_obmc_sub_pixel_variance32x16)
-        HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits8,
-                    aom_highbd_obmc_variance16x32,
-                    aom_highbd_obmc_sub_pixel_variance16x32)
-        HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits8,
-                    aom_highbd_obmc_variance16x16,
-                    aom_highbd_obmc_sub_pixel_variance16x16)
-        HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits8,
-                    aom_highbd_obmc_variance8x16,
-                    aom_highbd_obmc_sub_pixel_variance8x16)
-        HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits8,
-                    aom_highbd_obmc_variance16x8,
-                    aom_highbd_obmc_sub_pixel_variance16x8)
-        HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits8,
-                    aom_highbd_obmc_variance8x8,
-                    aom_highbd_obmc_sub_pixel_variance8x8)
-        HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits8,
-                    aom_highbd_obmc_variance4x8,
-                    aom_highbd_obmc_sub_pixel_variance4x8)
-        HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits8,
-                    aom_highbd_obmc_variance8x4,
-                    aom_highbd_obmc_sub_pixel_variance8x4)
-        HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits8,
-                    aom_highbd_obmc_variance4x4,
-                    aom_highbd_obmc_sub_pixel_variance4x4)
-        HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits8,
-                    aom_highbd_obmc_variance64x16,
-                    aom_highbd_obmc_sub_pixel_variance64x16)
-        HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits8,
-                    aom_highbd_obmc_variance16x64,
-                    aom_highbd_obmc_sub_pixel_variance16x64)
-        HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits8,
-                    aom_highbd_obmc_variance32x8,
-                    aom_highbd_obmc_sub_pixel_variance32x8)
-        HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits8,
-                    aom_highbd_obmc_variance8x32,
-                    aom_highbd_obmc_sub_pixel_variance8x32)
-        HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits8,
-                    aom_highbd_obmc_variance16x4,
-                    aom_highbd_obmc_sub_pixel_variance16x4)
-        HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits8,
-                    aom_highbd_obmc_variance4x16,
-                    aom_highbd_obmc_sub_pixel_variance4x16)
-        break;
-
-      case AOM_BITS_10:
-        HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits10,
-                   aom_highbd_sad64x16_avg_bits10, aom_highbd_10_variance64x16,
-                   aom_highbd_10_sub_pixel_variance64x16,
-                   aom_highbd_10_sub_pixel_avg_variance64x16,
-                   aom_highbd_sad64x16x4d_bits10,
-                   aom_highbd_jnt_sad64x16_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance64x16);
-
-        HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits10,
-                   aom_highbd_sad16x64_avg_bits10, aom_highbd_10_variance16x64,
-                   aom_highbd_10_sub_pixel_variance16x64,
-                   aom_highbd_10_sub_pixel_avg_variance16x64,
-                   aom_highbd_sad16x64x4d_bits10,
-                   aom_highbd_jnt_sad16x64_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance16x64);
-
-        HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits10,
-                   aom_highbd_sad32x8_avg_bits10, aom_highbd_10_variance32x8,
-                   aom_highbd_10_sub_pixel_variance32x8,
-                   aom_highbd_10_sub_pixel_avg_variance32x8,
-                   aom_highbd_sad32x8x4d_bits10,
-                   aom_highbd_jnt_sad32x8_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance32x8);
-
-        HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits10,
-                   aom_highbd_sad8x32_avg_bits10, aom_highbd_10_variance8x32,
-                   aom_highbd_10_sub_pixel_variance8x32,
-                   aom_highbd_10_sub_pixel_avg_variance8x32,
-                   aom_highbd_sad8x32x4d_bits10,
-                   aom_highbd_jnt_sad8x32_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance8x32);
-
-        HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits10,
-                   aom_highbd_sad16x4_avg_bits10, aom_highbd_10_variance16x4,
-                   aom_highbd_10_sub_pixel_variance16x4,
-                   aom_highbd_10_sub_pixel_avg_variance16x4,
-                   aom_highbd_sad16x4x4d_bits10,
-                   aom_highbd_jnt_sad16x4_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance16x4);
-
-        HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits10,
-                   aom_highbd_sad4x16_avg_bits10, aom_highbd_10_variance4x16,
-                   aom_highbd_10_sub_pixel_variance4x16,
-                   aom_highbd_10_sub_pixel_avg_variance4x16,
-                   aom_highbd_sad4x16x4d_bits10,
-                   aom_highbd_jnt_sad4x16_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance4x16);
-
-        HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits10,
-                   aom_highbd_sad32x16_avg_bits10, aom_highbd_10_variance32x16,
-                   aom_highbd_10_sub_pixel_variance32x16,
-                   aom_highbd_10_sub_pixel_avg_variance32x16,
-                   aom_highbd_sad32x16x4d_bits10,
-                   aom_highbd_jnt_sad32x16_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance32x16);
-
-        HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits10,
-                   aom_highbd_sad16x32_avg_bits10, aom_highbd_10_variance16x32,
-                   aom_highbd_10_sub_pixel_variance16x32,
-                   aom_highbd_10_sub_pixel_avg_variance16x32,
-                   aom_highbd_sad16x32x4d_bits10,
-                   aom_highbd_jnt_sad16x32_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance16x32);
-
-        HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits10,
-                   aom_highbd_sad64x32_avg_bits10, aom_highbd_10_variance64x32,
-                   aom_highbd_10_sub_pixel_variance64x32,
-                   aom_highbd_10_sub_pixel_avg_variance64x32,
-                   aom_highbd_sad64x32x4d_bits10,
-                   aom_highbd_jnt_sad64x32_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance64x32);
-
-        HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits10,
-                   aom_highbd_sad32x64_avg_bits10, aom_highbd_10_variance32x64,
-                   aom_highbd_10_sub_pixel_variance32x64,
-                   aom_highbd_10_sub_pixel_avg_variance32x64,
-                   aom_highbd_sad32x64x4d_bits10,
-                   aom_highbd_jnt_sad32x64_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance32x64);
-
-        HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits10,
-                   aom_highbd_sad32x32_avg_bits10, aom_highbd_10_variance32x32,
-                   aom_highbd_10_sub_pixel_variance32x32,
-                   aom_highbd_10_sub_pixel_avg_variance32x32,
-                   aom_highbd_sad32x32x4d_bits10,
-                   aom_highbd_jnt_sad32x32_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance32x32);
-
-        HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits10,
-                   aom_highbd_sad64x64_avg_bits10, aom_highbd_10_variance64x64,
-                   aom_highbd_10_sub_pixel_variance64x64,
-                   aom_highbd_10_sub_pixel_avg_variance64x64,
-                   aom_highbd_sad64x64x4d_bits10,
-                   aom_highbd_jnt_sad64x64_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance64x64);
-
-        HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits10,
-                   aom_highbd_sad16x16_avg_bits10, aom_highbd_10_variance16x16,
-                   aom_highbd_10_sub_pixel_variance16x16,
-                   aom_highbd_10_sub_pixel_avg_variance16x16,
-                   aom_highbd_sad16x16x4d_bits10,
-                   aom_highbd_jnt_sad16x16_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance16x16);
-
-        HIGHBD_BFP(BLOCK_16X8, aom_highbd_sad16x8_bits10,
-                   aom_highbd_sad16x8_avg_bits10, aom_highbd_10_variance16x8,
-                   aom_highbd_10_sub_pixel_variance16x8,
-                   aom_highbd_10_sub_pixel_avg_variance16x8,
-                   aom_highbd_sad16x8x4d_bits10,
-                   aom_highbd_jnt_sad16x8_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance16x8);
-
-        HIGHBD_BFP(BLOCK_8X16, aom_highbd_sad8x16_bits10,
-                   aom_highbd_sad8x16_avg_bits10, aom_highbd_10_variance8x16,
-                   aom_highbd_10_sub_pixel_variance8x16,
-                   aom_highbd_10_sub_pixel_avg_variance8x16,
-                   aom_highbd_sad8x16x4d_bits10,
-                   aom_highbd_jnt_sad8x16_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance8x16);
-
-        HIGHBD_BFP(
-            BLOCK_8X8, aom_highbd_sad8x8_bits10, aom_highbd_sad8x8_avg_bits10,
-            aom_highbd_10_variance8x8, aom_highbd_10_sub_pixel_variance8x8,
-            aom_highbd_10_sub_pixel_avg_variance8x8,
-            aom_highbd_sad8x8x4d_bits10, aom_highbd_jnt_sad8x8_avg_bits10,
-            aom_highbd_10_jnt_sub_pixel_avg_variance8x8);
-
-        HIGHBD_BFP(
-            BLOCK_8X4, aom_highbd_sad8x4_bits10, aom_highbd_sad8x4_avg_bits10,
-            aom_highbd_10_variance8x4, aom_highbd_10_sub_pixel_variance8x4,
-            aom_highbd_10_sub_pixel_avg_variance8x4,
-            aom_highbd_sad8x4x4d_bits10, aom_highbd_jnt_sad8x4_avg_bits10,
-            aom_highbd_10_jnt_sub_pixel_avg_variance8x4);
-
-        HIGHBD_BFP(
-            BLOCK_4X8, aom_highbd_sad4x8_bits10, aom_highbd_sad4x8_avg_bits10,
-            aom_highbd_10_variance4x8, aom_highbd_10_sub_pixel_variance4x8,
-            aom_highbd_10_sub_pixel_avg_variance4x8,
-            aom_highbd_sad4x8x4d_bits10, aom_highbd_jnt_sad4x8_avg_bits10,
-            aom_highbd_10_jnt_sub_pixel_avg_variance4x8);
-
-        HIGHBD_BFP(
-            BLOCK_4X4, aom_highbd_sad4x4_bits10, aom_highbd_sad4x4_avg_bits10,
-            aom_highbd_10_variance4x4, aom_highbd_10_sub_pixel_variance4x4,
-            aom_highbd_10_sub_pixel_avg_variance4x4,
-            aom_highbd_sad4x4x4d_bits10, aom_highbd_jnt_sad4x4_avg_bits10,
-            aom_highbd_10_jnt_sub_pixel_avg_variance4x4);
-
-        HIGHBD_BFP(BLOCK_128X128, aom_highbd_sad128x128_bits10,
-                   aom_highbd_sad128x128_avg_bits10,
-                   aom_highbd_10_variance128x128,
-                   aom_highbd_10_sub_pixel_variance128x128,
-                   aom_highbd_10_sub_pixel_avg_variance128x128,
-                   aom_highbd_sad128x128x4d_bits10,
-                   aom_highbd_jnt_sad128x128_avg_bits10,
-                   aom_highbd_10_jnt_sub_pixel_avg_variance128x128);
-
-        HIGHBD_BFP(
-            BLOCK_128X64, aom_highbd_sad128x64_bits10,
-            aom_highbd_sad128x64_avg_bits10, aom_highbd_10_variance128x64,
-            aom_highbd_10_sub_pixel_variance128x64,
-            aom_highbd_10_sub_pixel_avg_variance128x64,
-            aom_highbd_sad128x64x4d_bits10, aom_highbd_jnt_sad128x64_avg_bits10,
-            aom_highbd_10_jnt_sub_pixel_avg_variance128x64);
-
-        HIGHBD_BFP(
-            BLOCK_64X128, aom_highbd_sad64x128_bits10,
-            aom_highbd_sad64x128_avg_bits10, aom_highbd_10_variance64x128,
-            aom_highbd_10_sub_pixel_variance64x128,
-            aom_highbd_10_sub_pixel_avg_variance64x128,
-            aom_highbd_sad64x128x4d_bits10, aom_highbd_jnt_sad64x128_avg_bits10,
-            aom_highbd_10_jnt_sub_pixel_avg_variance64x128);
-
-        HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance128x128)
-        HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance128x64)
-        HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance64x128)
-        HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance64x64)
-        HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance64x32)
-        HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance32x64)
-        HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance32x32)
-        HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance32x16)
-        HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance16x32)
-        HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance16x16)
-        HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance8x16)
-        HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance16x8)
-        HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance8x8)
-        HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance4x8)
-        HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance8x4)
-        HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance4x4)
-        HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance64x16)
-        HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance16x64)
-        HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance32x8)
-        HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance8x32)
-        HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance16x4)
-        HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits10,
-                    aom_highbd_10_masked_sub_pixel_variance4x16)
-        HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits10,
-                    aom_highbd_10_obmc_variance128x128,
-                    aom_highbd_10_obmc_sub_pixel_variance128x128)
-        HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits10,
-                    aom_highbd_10_obmc_variance128x64,
-                    aom_highbd_10_obmc_sub_pixel_variance128x64)
-        HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits10,
-                    aom_highbd_10_obmc_variance64x128,
-                    aom_highbd_10_obmc_sub_pixel_variance64x128)
-        HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits10,
-                    aom_highbd_10_obmc_variance64x64,
-                    aom_highbd_10_obmc_sub_pixel_variance64x64)
-        HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits10,
-                    aom_highbd_10_obmc_variance64x32,
-                    aom_highbd_10_obmc_sub_pixel_variance64x32)
-        HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits10,
-                    aom_highbd_10_obmc_variance32x64,
-                    aom_highbd_10_obmc_sub_pixel_variance32x64)
-        HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits10,
-                    aom_highbd_10_obmc_variance32x32,
-                    aom_highbd_10_obmc_sub_pixel_variance32x32)
-        HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits10,
-                    aom_highbd_10_obmc_variance32x16,
-                    aom_highbd_10_obmc_sub_pixel_variance32x16)
-        HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits10,
-                    aom_highbd_10_obmc_variance16x32,
-                    aom_highbd_10_obmc_sub_pixel_variance16x32)
-        HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits10,
-                    aom_highbd_10_obmc_variance16x16,
-                    aom_highbd_10_obmc_sub_pixel_variance16x16)
-        HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits10,
-                    aom_highbd_10_obmc_variance8x16,
-                    aom_highbd_10_obmc_sub_pixel_variance8x16)
-        HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits10,
-                    aom_highbd_10_obmc_variance16x8,
-                    aom_highbd_10_obmc_sub_pixel_variance16x8)
-        HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits10,
-                    aom_highbd_10_obmc_variance8x8,
-                    aom_highbd_10_obmc_sub_pixel_variance8x8)
-        HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits10,
-                    aom_highbd_10_obmc_variance4x8,
-                    aom_highbd_10_obmc_sub_pixel_variance4x8)
-        HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits10,
-                    aom_highbd_10_obmc_variance8x4,
-                    aom_highbd_10_obmc_sub_pixel_variance8x4)
-        HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits10,
-                    aom_highbd_10_obmc_variance4x4,
-                    aom_highbd_10_obmc_sub_pixel_variance4x4)
-
-        HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits10,
-                    aom_highbd_10_obmc_variance64x16,
-                    aom_highbd_10_obmc_sub_pixel_variance64x16)
-
-        HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits10,
-                    aom_highbd_10_obmc_variance16x64,
-                    aom_highbd_10_obmc_sub_pixel_variance16x64)
-
-        HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits10,
-                    aom_highbd_10_obmc_variance32x8,
-                    aom_highbd_10_obmc_sub_pixel_variance32x8)
-
-        HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits10,
-                    aom_highbd_10_obmc_variance8x32,
-                    aom_highbd_10_obmc_sub_pixel_variance8x32)
-
-        HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits10,
-                    aom_highbd_10_obmc_variance16x4,
-                    aom_highbd_10_obmc_sub_pixel_variance16x4)
-
-        HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits10,
-                    aom_highbd_10_obmc_variance4x16,
-                    aom_highbd_10_obmc_sub_pixel_variance4x16)
-        break;
-
-      case AOM_BITS_12:
-        HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits12,
-                   aom_highbd_sad64x16_avg_bits12, aom_highbd_12_variance64x16,
-                   aom_highbd_12_sub_pixel_variance64x16,
-                   aom_highbd_12_sub_pixel_avg_variance64x16,
-                   aom_highbd_sad64x16x4d_bits12,
-                   aom_highbd_jnt_sad64x16_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance64x16);
-
-        HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits12,
-                   aom_highbd_sad16x64_avg_bits12, aom_highbd_12_variance16x64,
-                   aom_highbd_12_sub_pixel_variance16x64,
-                   aom_highbd_12_sub_pixel_avg_variance16x64,
-                   aom_highbd_sad16x64x4d_bits12,
-                   aom_highbd_jnt_sad16x64_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance16x64);
-
-        HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits12,
-                   aom_highbd_sad32x8_avg_bits12, aom_highbd_12_variance32x8,
-                   aom_highbd_12_sub_pixel_variance32x8,
-                   aom_highbd_12_sub_pixel_avg_variance32x8,
-                   aom_highbd_sad32x8x4d_bits12,
-                   aom_highbd_jnt_sad32x8_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance32x8);
-
-        HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits12,
-                   aom_highbd_sad8x32_avg_bits12, aom_highbd_12_variance8x32,
-                   aom_highbd_12_sub_pixel_variance8x32,
-                   aom_highbd_12_sub_pixel_avg_variance8x32,
-                   aom_highbd_sad8x32x4d_bits12,
-                   aom_highbd_jnt_sad8x32_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance8x32);
-
-        HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits12,
-                   aom_highbd_sad16x4_avg_bits12, aom_highbd_12_variance16x4,
-                   aom_highbd_12_sub_pixel_variance16x4,
-                   aom_highbd_12_sub_pixel_avg_variance16x4,
-                   aom_highbd_sad16x4x4d_bits12,
-                   aom_highbd_jnt_sad16x4_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance16x4);
-
-        HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits12,
-                   aom_highbd_sad4x16_avg_bits12, aom_highbd_12_variance4x16,
-                   aom_highbd_12_sub_pixel_variance4x16,
-                   aom_highbd_12_sub_pixel_avg_variance4x16,
-                   aom_highbd_sad4x16x4d_bits12,
-                   aom_highbd_jnt_sad4x16_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance4x16);
-
-        HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits12,
-                   aom_highbd_sad32x16_avg_bits12, aom_highbd_12_variance32x16,
-                   aom_highbd_12_sub_pixel_variance32x16,
-                   aom_highbd_12_sub_pixel_avg_variance32x16,
-                   aom_highbd_sad32x16x4d_bits12,
-                   aom_highbd_jnt_sad32x16_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance32x16);
-
-        HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits12,
-                   aom_highbd_sad16x32_avg_bits12, aom_highbd_12_variance16x32,
-                   aom_highbd_12_sub_pixel_variance16x32,
-                   aom_highbd_12_sub_pixel_avg_variance16x32,
-                   aom_highbd_sad16x32x4d_bits12,
-                   aom_highbd_jnt_sad16x32_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance16x32);
-
-        HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits12,
-                   aom_highbd_sad64x32_avg_bits12, aom_highbd_12_variance64x32,
-                   aom_highbd_12_sub_pixel_variance64x32,
-                   aom_highbd_12_sub_pixel_avg_variance64x32,
-                   aom_highbd_sad64x32x4d_bits12,
-                   aom_highbd_jnt_sad64x32_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance64x32);
-
-        HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits12,
-                   aom_highbd_sad32x64_avg_bits12, aom_highbd_12_variance32x64,
-                   aom_highbd_12_sub_pixel_variance32x64,
-                   aom_highbd_12_sub_pixel_avg_variance32x64,
-                   aom_highbd_sad32x64x4d_bits12,
-                   aom_highbd_jnt_sad32x64_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance32x64);
-
-        HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits12,
-                   aom_highbd_sad32x32_avg_bits12, aom_highbd_12_variance32x32,
-                   aom_highbd_12_sub_pixel_variance32x32,
-                   aom_highbd_12_sub_pixel_avg_variance32x32,
-                   aom_highbd_sad32x32x4d_bits12,
-                   aom_highbd_jnt_sad32x32_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance32x32);
-
-        HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits12,
-                   aom_highbd_sad64x64_avg_bits12, aom_highbd_12_variance64x64,
-                   aom_highbd_12_sub_pixel_variance64x64,
-                   aom_highbd_12_sub_pixel_avg_variance64x64,
-                   aom_highbd_sad64x64x4d_bits12,
-                   aom_highbd_jnt_sad64x64_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance64x64);
-
-        HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits12,
-                   aom_highbd_sad16x16_avg_bits12, aom_highbd_12_variance16x16,
-                   aom_highbd_12_sub_pixel_variance16x16,
-                   aom_highbd_12_sub_pixel_avg_variance16x16,
-                   aom_highbd_sad16x16x4d_bits12,
-                   aom_highbd_jnt_sad16x16_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance16x16);
-
-        HIGHBD_BFP(BLOCK_16X8, aom_highbd_sad16x8_bits12,
-                   aom_highbd_sad16x8_avg_bits12, aom_highbd_12_variance16x8,
-                   aom_highbd_12_sub_pixel_variance16x8,
-                   aom_highbd_12_sub_pixel_avg_variance16x8,
-                   aom_highbd_sad16x8x4d_bits12,
-                   aom_highbd_jnt_sad16x8_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance16x8);
-
-        HIGHBD_BFP(BLOCK_8X16, aom_highbd_sad8x16_bits12,
-                   aom_highbd_sad8x16_avg_bits12, aom_highbd_12_variance8x16,
-                   aom_highbd_12_sub_pixel_variance8x16,
-                   aom_highbd_12_sub_pixel_avg_variance8x16,
-                   aom_highbd_sad8x16x4d_bits12,
-                   aom_highbd_jnt_sad8x16_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance8x16);
-
-        HIGHBD_BFP(
-            BLOCK_8X8, aom_highbd_sad8x8_bits12, aom_highbd_sad8x8_avg_bits12,
-            aom_highbd_12_variance8x8, aom_highbd_12_sub_pixel_variance8x8,
-            aom_highbd_12_sub_pixel_avg_variance8x8,
-            aom_highbd_sad8x8x4d_bits12, aom_highbd_jnt_sad8x8_avg_bits12,
-            aom_highbd_12_jnt_sub_pixel_avg_variance8x8);
-
-        HIGHBD_BFP(
-            BLOCK_8X4, aom_highbd_sad8x4_bits12, aom_highbd_sad8x4_avg_bits12,
-            aom_highbd_12_variance8x4, aom_highbd_12_sub_pixel_variance8x4,
-            aom_highbd_12_sub_pixel_avg_variance8x4,
-            aom_highbd_sad8x4x4d_bits12, aom_highbd_jnt_sad8x4_avg_bits12,
-            aom_highbd_12_jnt_sub_pixel_avg_variance8x4);
-
-        HIGHBD_BFP(
-            BLOCK_4X8, aom_highbd_sad4x8_bits12, aom_highbd_sad4x8_avg_bits12,
-            aom_highbd_12_variance4x8, aom_highbd_12_sub_pixel_variance4x8,
-            aom_highbd_12_sub_pixel_avg_variance4x8,
-            aom_highbd_sad4x8x4d_bits12, aom_highbd_jnt_sad4x8_avg_bits12,
-            aom_highbd_12_jnt_sub_pixel_avg_variance4x8);
-
-        HIGHBD_BFP(
-            BLOCK_4X4, aom_highbd_sad4x4_bits12, aom_highbd_sad4x4_avg_bits12,
-            aom_highbd_12_variance4x4, aom_highbd_12_sub_pixel_variance4x4,
-            aom_highbd_12_sub_pixel_avg_variance4x4,
-            aom_highbd_sad4x4x4d_bits12, aom_highbd_jnt_sad4x4_avg_bits12,
-            aom_highbd_12_jnt_sub_pixel_avg_variance4x4);
-
-        HIGHBD_BFP(BLOCK_128X128, aom_highbd_sad128x128_bits12,
-                   aom_highbd_sad128x128_avg_bits12,
-                   aom_highbd_12_variance128x128,
-                   aom_highbd_12_sub_pixel_variance128x128,
-                   aom_highbd_12_sub_pixel_avg_variance128x128,
-                   aom_highbd_sad128x128x4d_bits12,
-                   aom_highbd_jnt_sad128x128_avg_bits12,
-                   aom_highbd_12_jnt_sub_pixel_avg_variance128x128);
-
-        HIGHBD_BFP(
-            BLOCK_128X64, aom_highbd_sad128x64_bits12,
-            aom_highbd_sad128x64_avg_bits12, aom_highbd_12_variance128x64,
-            aom_highbd_12_sub_pixel_variance128x64,
-            aom_highbd_12_sub_pixel_avg_variance128x64,
-            aom_highbd_sad128x64x4d_bits12, aom_highbd_jnt_sad128x64_avg_bits12,
-            aom_highbd_12_jnt_sub_pixel_avg_variance128x64);
-
-        HIGHBD_BFP(
-            BLOCK_64X128, aom_highbd_sad64x128_bits12,
-            aom_highbd_sad64x128_avg_bits12, aom_highbd_12_variance64x128,
-            aom_highbd_12_sub_pixel_variance64x128,
-            aom_highbd_12_sub_pixel_avg_variance64x128,
-            aom_highbd_sad64x128x4d_bits12, aom_highbd_jnt_sad64x128_avg_bits12,
-            aom_highbd_12_jnt_sub_pixel_avg_variance64x128);
-
-        HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance128x128)
-        HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance128x64)
-        HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance64x128)
-        HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance64x64)
-        HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance64x32)
-        HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance32x64)
-        HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance32x32)
-        HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance32x16)
-        HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance16x32)
-        HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance16x16)
-        HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance8x16)
-        HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance16x8)
-        HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance8x8)
-        HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance4x8)
-        HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance8x4)
-        HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance4x4)
-        HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance64x16)
-        HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance16x64)
-        HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance32x8)
-        HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance8x32)
-        HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance16x4)
-        HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits12,
-                    aom_highbd_12_masked_sub_pixel_variance4x16)
-        HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits12,
-                    aom_highbd_12_obmc_variance128x128,
-                    aom_highbd_12_obmc_sub_pixel_variance128x128)
-        HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits12,
-                    aom_highbd_12_obmc_variance128x64,
-                    aom_highbd_12_obmc_sub_pixel_variance128x64)
-        HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits12,
-                    aom_highbd_12_obmc_variance64x128,
-                    aom_highbd_12_obmc_sub_pixel_variance64x128)
-        HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits12,
-                    aom_highbd_12_obmc_variance64x64,
-                    aom_highbd_12_obmc_sub_pixel_variance64x64)
-        HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits12,
-                    aom_highbd_12_obmc_variance64x32,
-                    aom_highbd_12_obmc_sub_pixel_variance64x32)
-        HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits12,
-                    aom_highbd_12_obmc_variance32x64,
-                    aom_highbd_12_obmc_sub_pixel_variance32x64)
-        HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits12,
-                    aom_highbd_12_obmc_variance32x32,
-                    aom_highbd_12_obmc_sub_pixel_variance32x32)
-        HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits12,
-                    aom_highbd_12_obmc_variance32x16,
-                    aom_highbd_12_obmc_sub_pixel_variance32x16)
-        HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits12,
-                    aom_highbd_12_obmc_variance16x32,
-                    aom_highbd_12_obmc_sub_pixel_variance16x32)
-        HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits12,
-                    aom_highbd_12_obmc_variance16x16,
-                    aom_highbd_12_obmc_sub_pixel_variance16x16)
-        HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits12,
-                    aom_highbd_12_obmc_variance8x16,
-                    aom_highbd_12_obmc_sub_pixel_variance8x16)
-        HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits12,
-                    aom_highbd_12_obmc_variance16x8,
-                    aom_highbd_12_obmc_sub_pixel_variance16x8)
-        HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits12,
-                    aom_highbd_12_obmc_variance8x8,
-                    aom_highbd_12_obmc_sub_pixel_variance8x8)
-        HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits12,
-                    aom_highbd_12_obmc_variance4x8,
-                    aom_highbd_12_obmc_sub_pixel_variance4x8)
-        HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits12,
-                    aom_highbd_12_obmc_variance8x4,
-                    aom_highbd_12_obmc_sub_pixel_variance8x4)
-        HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits12,
-                    aom_highbd_12_obmc_variance4x4,
-                    aom_highbd_12_obmc_sub_pixel_variance4x4)
-        HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits12,
-                    aom_highbd_12_obmc_variance64x16,
-                    aom_highbd_12_obmc_sub_pixel_variance64x16)
-        HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits12,
-                    aom_highbd_12_obmc_variance16x64,
-                    aom_highbd_12_obmc_sub_pixel_variance16x64)
-        HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits12,
-                    aom_highbd_12_obmc_variance32x8,
-                    aom_highbd_12_obmc_sub_pixel_variance32x8)
-        HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits12,
-                    aom_highbd_12_obmc_variance8x32,
-                    aom_highbd_12_obmc_sub_pixel_variance8x32)
-        HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits12,
-                    aom_highbd_12_obmc_variance16x4,
-                    aom_highbd_12_obmc_sub_pixel_variance16x4)
-        HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits12,
-                    aom_highbd_12_obmc_variance4x16,
-                    aom_highbd_12_obmc_sub_pixel_variance4x16)
-        break;
-
-      default:
-        assert(0 &&
-               "cm->seq_params.bit_depth should be AOM_BITS_8, "
-               "AOM_BITS_10 or AOM_BITS_12");
-    }
-  }
-}
-
-static void realloc_segmentation_maps(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-
-  // Create the encoder segmentation map and set all entries to 0
-  aom_free(cpi->segmentation_map);
-  CHECK_MEM_ERROR(cm, cpi->segmentation_map,
-                  aom_calloc(cm->mi_rows * cm->mi_cols, 1));
-
-  // Create a map used for cyclic background refresh.
-  if (cpi->cyclic_refresh) av1_cyclic_refresh_free(cpi->cyclic_refresh);
-  CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
-                  av1_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
-
-  // Create a map used to mark inactive areas.
-  aom_free(cpi->active_map.map);
-  CHECK_MEM_ERROR(cm, cpi->active_map.map,
-                  aom_calloc(cm->mi_rows * cm->mi_cols, 1));
-}
-
-void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
-  AV1_COMMON *const cm = &cpi->common;
-  SequenceHeader *const seq_params = &cm->seq_params;
-  const int num_planes = av1_num_planes(cm);
-  RATE_CONTROL *const rc = &cpi->rc;
-  MACROBLOCK *const x = &cpi->td.mb;
-
-  if (seq_params->profile != oxcf->profile) seq_params->profile = oxcf->profile;
-  seq_params->bit_depth = oxcf->bit_depth;
-  seq_params->color_primaries = oxcf->color_primaries;
-  seq_params->transfer_characteristics = oxcf->transfer_characteristics;
-  seq_params->matrix_coefficients = oxcf->matrix_coefficients;
-  seq_params->monochrome = oxcf->monochrome;
-  seq_params->chroma_sample_position = oxcf->chroma_sample_position;
-  seq_params->color_range = oxcf->color_range;
-
-  assert(IMPLIES(seq_params->profile <= PROFILE_1,
-                 seq_params->bit_depth <= AOM_BITS_10));
-
-  cm->timing_info_present = oxcf->timing_info_present;
-  cm->timing_info.num_units_in_display_tick =
-      oxcf->timing_info.num_units_in_display_tick;
-  cm->timing_info.time_scale = oxcf->timing_info.time_scale;
-  cm->timing_info.equal_picture_interval =
-      oxcf->timing_info.equal_picture_interval;
-  cm->timing_info.num_ticks_per_picture =
-      oxcf->timing_info.num_ticks_per_picture;
-
-  seq_params->display_model_info_present_flag =
-      oxcf->display_model_info_present_flag;
-  seq_params->decoder_model_info_present_flag =
-      oxcf->decoder_model_info_present_flag;
-  if (oxcf->decoder_model_info_present_flag) {
-    // set the decoder model parameters in schedule mode
-    cm->buffer_model.num_units_in_decoding_tick =
-        oxcf->buffer_model.num_units_in_decoding_tick;
-    cm->buffer_removal_time_present = 1;
-    set_aom_dec_model_info(&cm->buffer_model);
-    set_dec_model_op_parameters(&cm->op_params[0]);
-  } else if (cm->timing_info_present &&
-             cm->timing_info.equal_picture_interval &&
-             !seq_params->decoder_model_info_present_flag) {
-    // set the decoder model parameters in resource availability mode
-    set_resource_availability_parameters(&cm->op_params[0]);
-  } else {
-    cm->op_params[0].initial_display_delay =
-        10;  // Default value (not signaled)
-  }
-
-  update_film_grain_parameters(cpi, oxcf);
-
-  cpi->oxcf = *oxcf;
-  cpi->common.options = oxcf->cfg;
-  cpi->row_mt = oxcf->row_mt;
-  x->e_mbd.bd = (int)seq_params->bit_depth;
-  x->e_mbd.global_motion = cm->global_motion;
-
-  if ((oxcf->pass == 0) && (oxcf->rc_mode == AOM_Q)) {
-    rc->baseline_gf_interval = FIXED_GF_INTERVAL;
-  } else {
-    rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
-  }
-
-  cpi->refresh_last_frame = 1;
-  cpi->refresh_golden_frame = 0;
-  cpi->refresh_bwd_ref_frame = 0;
-  cpi->refresh_alt2_ref_frame = 0;
-
-  cm->refresh_frame_context = (oxcf->frame_parallel_decoding_mode)
-                                  ? REFRESH_FRAME_CONTEXT_DISABLED
-                                  : REFRESH_FRAME_CONTEXT_BACKWARD;
-  if (oxcf->large_scale_tile)
-    cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
-
-  if (x->palette_buffer == NULL) {
-    CHECK_MEM_ERROR(cm, x->palette_buffer,
-                    aom_memalign(16, sizeof(*x->palette_buffer)));
-  }
-
-  if (x->tmp_conv_dst == NULL) {
-    CHECK_MEM_ERROR(
-        cm, x->tmp_conv_dst,
-        aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*x->tmp_conv_dst)));
-    x->e_mbd.tmp_conv_dst = x->tmp_conv_dst;
-  }
-  for (int i = 0; i < 2; ++i) {
-    if (x->tmp_obmc_bufs[i] == NULL) {
-      CHECK_MEM_ERROR(cm, x->tmp_obmc_bufs[i],
-                      aom_memalign(16, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
-                                           sizeof(*x->tmp_obmc_bufs[i])));
-      x->e_mbd.tmp_obmc_bufs[i] = x->tmp_obmc_bufs[i];
-    }
-  }
-
-  av1_reset_segment_features(cm);
-  set_high_precision_mv(cpi, 1, 0);
-
-  set_rc_buffer_sizes(rc, &cpi->oxcf);
-
-  // Under a configuration change, where maximum_buffer_size may change,
-  // keep buffer level clipped to the maximum allowed buffer size.
-  rc->bits_off_target = AOMMIN(rc->bits_off_target, rc->maximum_buffer_size);
-  rc->buffer_level = AOMMIN(rc->buffer_level, rc->maximum_buffer_size);
-
-  // Set up frame rate and related parameters rate control values.
-  av1_new_framerate(cpi, cpi->framerate);
-
-  // Set absolute upper and lower quality limits
-  rc->worst_quality = cpi->oxcf.worst_allowed_q;
-  rc->best_quality = cpi->oxcf.best_allowed_q;
-
-  cm->interp_filter = oxcf->large_scale_tile ? EIGHTTAP_REGULAR : SWITCHABLE;
-  cm->switchable_motion_mode = 1;
-
-  if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
-    cm->render_width = cpi->oxcf.render_width;
-    cm->render_height = cpi->oxcf.render_height;
-  } else {
-    cm->render_width = cpi->oxcf.width;
-    cm->render_height = cpi->oxcf.height;
-  }
-  cm->width = cpi->oxcf.width;
-  cm->height = cpi->oxcf.height;
-
-  int sb_size = seq_params->sb_size;
-  // Superblock size should not be updated after the first key frame.
-  if (!cpi->seq_params_locked) {
-    set_sb_size(&cm->seq_params, select_sb_size(cpi));
-  }
-
-  if (cpi->initial_width || sb_size != seq_params->sb_size) {
-    if (cm->width > cpi->initial_width || cm->height > cpi->initial_height ||
-        seq_params->sb_size != sb_size) {
-      av1_free_context_buffers(cm);
-      av1_free_pc_tree(&cpi->td, num_planes);
-      alloc_compressor_data(cpi);
-      realloc_segmentation_maps(cpi);
-      cpi->initial_width = cpi->initial_height = 0;
-    }
-  }
-  update_frame_size(cpi);
-
-  cpi->alt_ref_source = NULL;
-  rc->is_src_frame_alt_ref = 0;
-
-  rc->is_bwd_ref_frame = 0;
-  rc->is_last_bipred_frame = 0;
-  rc->is_bipred_frame = 0;
-
-  set_tile_info(cpi);
-
-  cpi->ext_refresh_frame_flags_pending = 0;
-  cpi->ext_refresh_frame_context_pending = 0;
-
-  highbd_set_var_fns(cpi);
-
-  // Init sequence level coding tools
-  // This should not be called after the first key frame.
-  if (!cpi->seq_params_locked) {
-    seq_params->operating_points_cnt_minus_1 =
-        cm->number_spatial_layers > 1 ? cm->number_spatial_layers - 1 : 0;
-    init_seq_coding_tools(&cm->seq_params, cm, oxcf);
-  }
-}
-
-AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
-                                BufferPool *const pool) {
-  unsigned int i;
-  AV1_COMP *volatile const cpi = aom_memalign(32, sizeof(AV1_COMP));
-  AV1_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
-
-  if (!cm) return NULL;
-
-  av1_zero(*cpi);
-
-  // The jmp_buf is valid only for the duration of the function that calls
-  // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
-  // before it returns.
-  if (setjmp(cm->error.jmp)) {
-    cm->error.setjmp = 0;
-    av1_remove_compressor(cpi);
-    return 0;
-  }
-
-  cm->error.setjmp = 1;
-  cm->alloc_mi = enc_alloc_mi;
-  cm->free_mi = enc_free_mi;
-  cm->setup_mi = enc_setup_mi;
-
-  CHECK_MEM_ERROR(cm, cm->fc,
-                  (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
-  CHECK_MEM_ERROR(cm, cm->frame_contexts,
-                  (FRAME_CONTEXT *)aom_memalign(
-                      32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)));
-  memset(cm->fc, 0, sizeof(*cm->fc));
-  memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts));
-
-  cpi->resize_state = 0;
-  cpi->resize_avg_qp = 0;
-  cpi->resize_buffer_underflow = 0;
-
-  cpi->common.buffer_pool = pool;
-
-  init_config(cpi, oxcf);
-  av1_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
-
-  cm->current_video_frame = 0;
-  cpi->seq_params_locked = 0;
-  cpi->partition_search_skippable_frame = 0;
-  cpi->tile_data = NULL;
-  cpi->last_show_frame_buf_idx = INVALID_IDX;
-
-  realloc_segmentation_maps(cpi);
-
-  memset(cpi->nmv_costs, 0, sizeof(cpi->nmv_costs));
-  memset(cpi->nmv_costs_hp, 0, sizeof(cpi->nmv_costs_hp));
-
-  for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
-       i++) {
-    CHECK_MEM_ERROR(
-        cm, cpi->mbgraph_stats[i].mb_stats,
-        aom_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
-  }
-
-#if CONFIG_FP_MB_STATS
-  cpi->use_fp_mb_stats = 0;
-  if (cpi->use_fp_mb_stats) {
-    // a place holder used to store the first pass mb stats in the first pass
-    CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
-                    aom_calloc(cm->MBs * sizeof(uint8_t), 1));
-  } else {
-    cpi->twopass.frame_mb_stats_buf = NULL;
-  }
-#endif
-
-  cpi->refresh_alt_ref_frame = 0;
-
-  cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
-#if CONFIG_INTERNAL_STATS
-  cpi->b_calculate_blockiness = 1;
-  cpi->b_calculate_consistency = 1;
-  cpi->total_inconsistency = 0;
-  cpi->psnr.worst = 100.0;
-  cpi->worst_ssim = 100.0;
-
-  cpi->count = 0;
-  cpi->bytes = 0;
-
-  if (cpi->b_calculate_psnr) {
-    cpi->total_sq_error = 0;
-    cpi->total_samples = 0;
-    cpi->tot_recode_hits = 0;
-    cpi->summed_quality = 0;
-    cpi->summed_weights = 0;
-  }
-
-  cpi->fastssim.worst = 100.0;
-  cpi->psnrhvs.worst = 100.0;
-
-  if (cpi->b_calculate_blockiness) {
-    cpi->total_blockiness = 0;
-    cpi->worst_blockiness = 0.0;
-  }
-
-  if (cpi->b_calculate_consistency) {
-    CHECK_MEM_ERROR(cm, cpi->ssim_vars,
-                    aom_malloc(sizeof(*cpi->ssim_vars) * 4 *
-                               cpi->common.mi_rows * cpi->common.mi_cols));
-    cpi->worst_consistency = 100.0;
-  }
-#endif
-#if CONFIG_ENTROPY_STATS
-  av1_zero(aggregate_fc);
-#endif  // CONFIG_ENTROPY_STATS
-
-  cpi->first_time_stamp_ever = INT64_MAX;
-
-  cpi->td.mb.nmvcost[0] = &cpi->nmv_costs[0][MV_MAX];
-  cpi->td.mb.nmvcost[1] = &cpi->nmv_costs[1][MV_MAX];
-  cpi->td.mb.nmvcost_hp[0] = &cpi->nmv_costs_hp[0][MV_MAX];
-  cpi->td.mb.nmvcost_hp[1] = &cpi->nmv_costs_hp[1][MV_MAX];
-
-#ifdef OUTPUT_YUV_SKINMAP
-  yuv_skinmap_file = fopen("skinmap.yuv", "ab");
-#endif
-#ifdef OUTPUT_YUV_REC
-  yuv_rec_file = fopen("rec.yuv", "wb");
-#endif
-
-  if (oxcf->pass == 1) {
-    av1_init_first_pass(cpi);
-  } else if (oxcf->pass == 2) {
-    const size_t packet_sz = sizeof(FIRSTPASS_STATS);
-    const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
-
-#if CONFIG_FP_MB_STATS
-    if (cpi->use_fp_mb_stats) {
-      const size_t psz = cpi->common.MBs * sizeof(uint8_t);
-      const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
-
-      cpi->twopass.firstpass_mb_stats.mb_stats_start =
-          oxcf->firstpass_mb_stats_in.buf;
-      cpi->twopass.firstpass_mb_stats.mb_stats_end =
-          cpi->twopass.firstpass_mb_stats.mb_stats_start +
-          (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
-    }
-#endif
-
-    cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
-    cpi->twopass.stats_in = cpi->twopass.stats_in_start;
-    cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
-
-    av1_init_second_pass(cpi);
-  }
-
-  CHECK_MEM_ERROR(
-      cm, cpi->td.mb.above_pred_buf,
-      (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
-                                      sizeof(*cpi->td.mb.above_pred_buf)));
-  CHECK_MEM_ERROR(
-      cm, cpi->td.mb.left_pred_buf,
-      (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
-                                      sizeof(*cpi->td.mb.left_pred_buf)));
-
-  CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf,
-                  (int32_t *)aom_memalign(
-                      16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf)));
-
-  for (int x = 0; x < 2; x++)
-    for (int y = 0; y < 2; y++)
-      CHECK_MEM_ERROR(
-          cm, cpi->td.mb.hash_value_buffer[x][y],
-          (uint32_t *)aom_malloc(AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
-                                 sizeof(*cpi->td.mb.hash_value_buffer[0][0])));
-
-  cpi->td.mb.g_crc_initialized = 0;
-
-  CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf,
-                  (int32_t *)aom_memalign(
-                      16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf)));
-
-  av1_set_speed_features_framesize_independent(cpi);
-  av1_set_speed_features_framesize_dependent(cpi);
-
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \
-  cpi->fn_ptr[BT].sdf = SDF;                                    \
-  cpi->fn_ptr[BT].sdaf = SDAF;                                  \
-  cpi->fn_ptr[BT].vf = VF;                                      \
-  cpi->fn_ptr[BT].svf = SVF;                                    \
-  cpi->fn_ptr[BT].svaf = SVAF;                                  \
-  cpi->fn_ptr[BT].sdx4df = SDX4DF;                              \
-  cpi->fn_ptr[BT].jsdaf = JSDAF;                                \
-  cpi->fn_ptr[BT].jsvaf = JSVAF;
-
-  BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16,
-      aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16,
-      aom_sad4x16x4d, aom_jnt_sad4x16_avg, aom_jnt_sub_pixel_avg_variance4x16)
-
-  BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4,
-      aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4,
-      aom_sad16x4x4d, aom_jnt_sad16x4_avg, aom_jnt_sub_pixel_avg_variance16x4)
-
-  BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32,
-      aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32,
-      aom_sad8x32x4d, aom_jnt_sad8x32_avg, aom_jnt_sub_pixel_avg_variance8x32)
-
-  BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8,
-      aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8,
-      aom_sad32x8x4d, aom_jnt_sad32x8_avg, aom_jnt_sub_pixel_avg_variance32x8)
-
-  BFP(BLOCK_16X64, aom_sad16x64, aom_sad16x64_avg, aom_variance16x64,
-      aom_sub_pixel_variance16x64, aom_sub_pixel_avg_variance16x64,
-      aom_sad16x64x4d, aom_jnt_sad16x64_avg,
-      aom_jnt_sub_pixel_avg_variance16x64)
-
-  BFP(BLOCK_64X16, aom_sad64x16, aom_sad64x16_avg, aom_variance64x16,
-      aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16,
-      aom_sad64x16x4d, aom_jnt_sad64x16_avg,
-      aom_jnt_sub_pixel_avg_variance64x16)
-
-  BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128,
-      aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128,
-      aom_sad128x128x4d, aom_jnt_sad128x128_avg,
-      aom_jnt_sub_pixel_avg_variance128x128)
-
-  BFP(BLOCK_128X64, aom_sad128x64, aom_sad128x64_avg, aom_variance128x64,
-      aom_sub_pixel_variance128x64, aom_sub_pixel_avg_variance128x64,
-      aom_sad128x64x4d, aom_jnt_sad128x64_avg,
-      aom_jnt_sub_pixel_avg_variance128x64)
-
-  BFP(BLOCK_64X128, aom_sad64x128, aom_sad64x128_avg, aom_variance64x128,
-      aom_sub_pixel_variance64x128, aom_sub_pixel_avg_variance64x128,
-      aom_sad64x128x4d, aom_jnt_sad64x128_avg,
-      aom_jnt_sub_pixel_avg_variance64x128)
-
-  BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg, aom_variance32x16,
-      aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16,
-      aom_sad32x16x4d, aom_jnt_sad32x16_avg,
-      aom_jnt_sub_pixel_avg_variance32x16)
-
-  BFP(BLOCK_16X32, aom_sad16x32, aom_sad16x32_avg, aom_variance16x32,
-      aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32,
-      aom_sad16x32x4d, aom_jnt_sad16x32_avg,
-      aom_jnt_sub_pixel_avg_variance16x32)
-
-  BFP(BLOCK_64X32, aom_sad64x32, aom_sad64x32_avg, aom_variance64x32,
-      aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32,
-      aom_sad64x32x4d, aom_jnt_sad64x32_avg,
-      aom_jnt_sub_pixel_avg_variance64x32)
-
-  BFP(BLOCK_32X64, aom_sad32x64, aom_sad32x64_avg, aom_variance32x64,
-      aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64,
-      aom_sad32x64x4d, aom_jnt_sad32x64_avg,
-      aom_jnt_sub_pixel_avg_variance32x64)
-
-  BFP(BLOCK_32X32, aom_sad32x32, aom_sad32x32_avg, aom_variance32x32,
-      aom_sub_pixel_variance32x32, aom_sub_pixel_avg_variance32x32,
-      aom_sad32x32x4d, aom_jnt_sad32x32_avg,
-      aom_jnt_sub_pixel_avg_variance32x32)
-
-  BFP(BLOCK_64X64, aom_sad64x64, aom_sad64x64_avg, aom_variance64x64,
-      aom_sub_pixel_variance64x64, aom_sub_pixel_avg_variance64x64,
-      aom_sad64x64x4d, aom_jnt_sad64x64_avg,
-      aom_jnt_sub_pixel_avg_variance64x64)
-
-  BFP(BLOCK_16X16, aom_sad16x16, aom_sad16x16_avg, aom_variance16x16,
-      aom_sub_pixel_variance16x16, aom_sub_pixel_avg_variance16x16,
-      aom_sad16x16x4d, aom_jnt_sad16x16_avg,
-      aom_jnt_sub_pixel_avg_variance16x16)
-
-  BFP(BLOCK_16X8, aom_sad16x8, aom_sad16x8_avg, aom_variance16x8,
-      aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8,
-      aom_sad16x8x4d, aom_jnt_sad16x8_avg, aom_jnt_sub_pixel_avg_variance16x8)
-
-  BFP(BLOCK_8X16, aom_sad8x16, aom_sad8x16_avg, aom_variance8x16,
-      aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16,
-      aom_sad8x16x4d, aom_jnt_sad8x16_avg, aom_jnt_sub_pixel_avg_variance8x16)
-
-  BFP(BLOCK_8X8, aom_sad8x8, aom_sad8x8_avg, aom_variance8x8,
-      aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x4d,
-      aom_jnt_sad8x8_avg, aom_jnt_sub_pixel_avg_variance8x8)
-
-  BFP(BLOCK_8X4, aom_sad8x4, aom_sad8x4_avg, aom_variance8x4,
-      aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, aom_sad8x4x4d,
-      aom_jnt_sad8x4_avg, aom_jnt_sub_pixel_avg_variance8x4)
-
-  BFP(BLOCK_4X8, aom_sad4x8, aom_sad4x8_avg, aom_variance4x8,
-      aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, aom_sad4x8x4d,
-      aom_jnt_sad4x8_avg, aom_jnt_sub_pixel_avg_variance4x8)
-
-  BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg, aom_variance4x4,
-      aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x4d,
-      aom_jnt_sad4x4_avg, aom_jnt_sub_pixel_avg_variance4x4)
-
-#define OBFP(BT, OSDF, OVF, OSVF) \
-  cpi->fn_ptr[BT].osdf = OSDF;    \
-  cpi->fn_ptr[BT].ovf = OVF;      \
-  cpi->fn_ptr[BT].osvf = OSVF;
-
-  OBFP(BLOCK_128X128, aom_obmc_sad128x128, aom_obmc_variance128x128,
-       aom_obmc_sub_pixel_variance128x128)
-  OBFP(BLOCK_128X64, aom_obmc_sad128x64, aom_obmc_variance128x64,
-       aom_obmc_sub_pixel_variance128x64)
-  OBFP(BLOCK_64X128, aom_obmc_sad64x128, aom_obmc_variance64x128,
-       aom_obmc_sub_pixel_variance64x128)
-  OBFP(BLOCK_64X64, aom_obmc_sad64x64, aom_obmc_variance64x64,
-       aom_obmc_sub_pixel_variance64x64)
-  OBFP(BLOCK_64X32, aom_obmc_sad64x32, aom_obmc_variance64x32,
-       aom_obmc_sub_pixel_variance64x32)
-  OBFP(BLOCK_32X64, aom_obmc_sad32x64, aom_obmc_variance32x64,
-       aom_obmc_sub_pixel_variance32x64)
-  OBFP(BLOCK_32X32, aom_obmc_sad32x32, aom_obmc_variance32x32,
-       aom_obmc_sub_pixel_variance32x32)
-  OBFP(BLOCK_32X16, aom_obmc_sad32x16, aom_obmc_variance32x16,
-       aom_obmc_sub_pixel_variance32x16)
-  OBFP(BLOCK_16X32, aom_obmc_sad16x32, aom_obmc_variance16x32,
-       aom_obmc_sub_pixel_variance16x32)
-  OBFP(BLOCK_16X16, aom_obmc_sad16x16, aom_obmc_variance16x16,
-       aom_obmc_sub_pixel_variance16x16)
-  OBFP(BLOCK_16X8, aom_obmc_sad16x8, aom_obmc_variance16x8,
-       aom_obmc_sub_pixel_variance16x8)
-  OBFP(BLOCK_8X16, aom_obmc_sad8x16, aom_obmc_variance8x16,
-       aom_obmc_sub_pixel_variance8x16)
-  OBFP(BLOCK_8X8, aom_obmc_sad8x8, aom_obmc_variance8x8,
-       aom_obmc_sub_pixel_variance8x8)
-  OBFP(BLOCK_4X8, aom_obmc_sad4x8, aom_obmc_variance4x8,
-       aom_obmc_sub_pixel_variance4x8)
-  OBFP(BLOCK_8X4, aom_obmc_sad8x4, aom_obmc_variance8x4,
-       aom_obmc_sub_pixel_variance8x4)
-  OBFP(BLOCK_4X4, aom_obmc_sad4x4, aom_obmc_variance4x4,
-       aom_obmc_sub_pixel_variance4x4)
-  OBFP(BLOCK_4X16, aom_obmc_sad4x16, aom_obmc_variance4x16,
-       aom_obmc_sub_pixel_variance4x16)
-  OBFP(BLOCK_16X4, aom_obmc_sad16x4, aom_obmc_variance16x4,
-       aom_obmc_sub_pixel_variance16x4)
-  OBFP(BLOCK_8X32, aom_obmc_sad8x32, aom_obmc_variance8x32,
-       aom_obmc_sub_pixel_variance8x32)
-  OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8,
-       aom_obmc_sub_pixel_variance32x8)
-  OBFP(BLOCK_16X64, aom_obmc_sad16x64, aom_obmc_variance16x64,
-       aom_obmc_sub_pixel_variance16x64)
-  OBFP(BLOCK_64X16, aom_obmc_sad64x16, aom_obmc_variance64x16,
-       aom_obmc_sub_pixel_variance64x16)
-
-#define MBFP(BT, MCSDF, MCSVF)  \
-  cpi->fn_ptr[BT].msdf = MCSDF; \
-  cpi->fn_ptr[BT].msvf = MCSVF;
-
-  MBFP(BLOCK_128X128, aom_masked_sad128x128,
-       aom_masked_sub_pixel_variance128x128)
-  MBFP(BLOCK_128X64, aom_masked_sad128x64, aom_masked_sub_pixel_variance128x64)
-  MBFP(BLOCK_64X128, aom_masked_sad64x128, aom_masked_sub_pixel_variance64x128)
-  MBFP(BLOCK_64X64, aom_masked_sad64x64, aom_masked_sub_pixel_variance64x64)
-  MBFP(BLOCK_64X32, aom_masked_sad64x32, aom_masked_sub_pixel_variance64x32)
-  MBFP(BLOCK_32X64, aom_masked_sad32x64, aom_masked_sub_pixel_variance32x64)
-  MBFP(BLOCK_32X32, aom_masked_sad32x32, aom_masked_sub_pixel_variance32x32)
-  MBFP(BLOCK_32X16, aom_masked_sad32x16, aom_masked_sub_pixel_variance32x16)
-  MBFP(BLOCK_16X32, aom_masked_sad16x32, aom_masked_sub_pixel_variance16x32)
-  MBFP(BLOCK_16X16, aom_masked_sad16x16, aom_masked_sub_pixel_variance16x16)
-  MBFP(BLOCK_16X8, aom_masked_sad16x8, aom_masked_sub_pixel_variance16x8)
-  MBFP(BLOCK_8X16, aom_masked_sad8x16, aom_masked_sub_pixel_variance8x16)
-  MBFP(BLOCK_8X8, aom_masked_sad8x8, aom_masked_sub_pixel_variance8x8)
-  MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_sub_pixel_variance4x8)
-  MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_sub_pixel_variance8x4)
-  MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_sub_pixel_variance4x4)
-
-  MBFP(BLOCK_4X16, aom_masked_sad4x16, aom_masked_sub_pixel_variance4x16)
-
-  MBFP(BLOCK_16X4, aom_masked_sad16x4, aom_masked_sub_pixel_variance16x4)
-
-  MBFP(BLOCK_8X32, aom_masked_sad8x32, aom_masked_sub_pixel_variance8x32)
-
-  MBFP(BLOCK_32X8, aom_masked_sad32x8, aom_masked_sub_pixel_variance32x8)
-
-  MBFP(BLOCK_16X64, aom_masked_sad16x64, aom_masked_sub_pixel_variance16x64)
-
-  MBFP(BLOCK_64X16, aom_masked_sad64x16, aom_masked_sub_pixel_variance64x16)
-
-  highbd_set_var_fns(cpi);
-
-  /* av1_init_quantizer() is first called here. Add check in
-   * av1_frame_init_quantizer() so that av1_init_quantizer is only
-   * called later when needed. This will avoid unnecessary calls of
-   * av1_init_quantizer() for every frame.
-   */
-  av1_init_quantizer(cpi);
-  av1_qm_init(cm);
-
-  av1_loop_filter_init(cm);
-  cm->superres_scale_denominator = SCALE_NUMERATOR;
-  cm->superres_upscaled_width = oxcf->width;
-  cm->superres_upscaled_height = oxcf->height;
-  av1_loop_restoration_precal();
-
-  cm->error.setjmp = 0;
-
-  return cpi;
-}
-
-#if CONFIG_INTERNAL_STATS
-#define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
-
-#define SNPRINT2(H, T, V) \
-  snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
-#endif  // CONFIG_INTERNAL_STATS
-
-void av1_remove_compressor(AV1_COMP *cpi) {
-  AV1_COMMON *cm;
-  unsigned int i;
-  int t;
-
-  if (!cpi) return;
-
-  cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-
-  if (cm->current_video_frame > 0) {
-#if CONFIG_ENTROPY_STATS
-    if (cpi->oxcf.pass != 1) {
-      fprintf(stderr, "Writing counts.stt\n");
-      FILE *f = fopen("counts.stt", "wb");
-      fwrite(&aggregate_fc, sizeof(aggregate_fc), 1, f);
-      fclose(f);
-    }
-#endif  // CONFIG_ENTROPY_STATS
-#if CONFIG_INTERNAL_STATS
-    aom_clear_system_state();
-
-    if (cpi->oxcf.pass != 1) {
-      char headings[512] = { 0 };
-      char results[512] = { 0 };
-      FILE *f = fopen("opsnr.stt", "a");
-      double time_encoded =
-          (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
-          10000000.000;
-      double total_encode_time =
-          (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
-      const double dr =
-          (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
-      const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
-      const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
-      const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
-
-      if (cpi->b_calculate_psnr) {
-        const double total_psnr = aom_sse_to_psnr(
-            (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
-        const double total_ssim =
-            100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
-        snprintf(headings, sizeof(headings),
-                 "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
-                 "AOMSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
-                 "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
-                 "AVPsrnY\tAPsnrCb\tAPsnrCr");
-        snprintf(results, sizeof(results),
-                 "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
-                 "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
-                 "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
-                 "%7.3f\t%7.3f\t%7.3f",
-                 dr, cpi->psnr.stat[STAT_ALL] / cpi->count, total_psnr,
-                 cpi->psnr.stat[STAT_ALL] / cpi->count, total_psnr, total_ssim,
-                 total_ssim, cpi->fastssim.stat[STAT_ALL] / cpi->count,
-                 cpi->psnrhvs.stat[STAT_ALL] / cpi->count, cpi->psnr.worst,
-                 cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
-                 cpi->psnr.stat[STAT_Y] / cpi->count,
-                 cpi->psnr.stat[STAT_U] / cpi->count,
-                 cpi->psnr.stat[STAT_V] / cpi->count);
-
-        if (cpi->b_calculate_blockiness) {
-          SNPRINT(headings, "\t  Block\tWstBlck");
-          SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
-          SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
-        }
-
-        if (cpi->b_calculate_consistency) {
-          double consistency =
-              aom_sse_to_psnr((double)cpi->total_samples, peak,
-                              (double)cpi->total_inconsistency);
-
-          SNPRINT(headings, "\tConsist\tWstCons");
-          SNPRINT2(results, "\t%7.3f", consistency);
-          SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
-        }
-        fprintf(f, "%s\t    Time\tRcErr\tAbsErr\n", headings);
-        fprintf(f, "%s\t%8.0f\t%7.2f\t%7.2f\n", results, total_encode_time,
-                rate_err, fabs(rate_err));
-      }
-
-      fclose(f);
-    }
-#endif  // CONFIG_INTERNAL_STATS
-  }
-
-  for (t = 0; t < cpi->num_workers; ++t) {
-    AVxWorker *const worker = &cpi->workers[t];
-    EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
-
-    // Deallocate allocated threads.
-    aom_get_worker_interface()->end(worker);
-
-    // Deallocate allocated thread data.
-    if (t < cpi->num_workers - 1) {
-      aom_free(thread_data->td->palette_buffer);
-      aom_free(thread_data->td->tmp_conv_dst);
-      for (int j = 0; j < 2; ++j) {
-        aom_free(thread_data->td->tmp_obmc_bufs[j]);
-      }
-      aom_free(thread_data->td->above_pred_buf);
-      aom_free(thread_data->td->left_pred_buf);
-      aom_free(thread_data->td->wsrc_buf);
-      for (int x = 0; x < 2; x++) {
-        for (int y = 0; y < 2; y++) {
-          aom_free(thread_data->td->hash_value_buffer[x][y]);
-          thread_data->td->hash_value_buffer[x][y] = NULL;
-        }
-      }
-      aom_free(thread_data->td->mask_buf);
-      aom_free(thread_data->td->counts);
-      av1_free_pc_tree(thread_data->td, num_planes);
-      aom_free(thread_data->td);
-    }
-  }
-  aom_free(cpi->tile_thr_data);
-  aom_free(cpi->workers);
-
-  if (cpi->num_workers > 1) {
-    av1_loop_filter_dealloc(&cpi->lf_row_sync);
-    av1_loop_restoration_dealloc(&cpi->lr_row_sync, cpi->num_workers);
-  }
-
-  dealloc_compressor_data(cpi);
-
-  for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
-       ++i) {
-    aom_free(cpi->mbgraph_stats[i].mb_stats);
-  }
-
-#if CONFIG_FP_MB_STATS
-  if (cpi->use_fp_mb_stats) {
-    aom_free(cpi->twopass.frame_mb_stats_buf);
-    cpi->twopass.frame_mb_stats_buf = NULL;
-  }
-#endif
-#if CONFIG_INTERNAL_STATS
-  aom_free(cpi->ssim_vars);
-  cpi->ssim_vars = NULL;
-#endif  // CONFIG_INTERNAL_STATS
-
-  av1_remove_common(cm);
-  for (i = 0; i < FRAME_BUFFERS; ++i) {
-    av1_hash_table_destroy(&cm->buffer_pool->frame_bufs[i].hash_table);
-  }
-  if (cpi->sf.use_hash_based_trellis) hbt_destroy();
-  av1_free_ref_frame_buffers(cm->buffer_pool);
-  aom_free(cpi);
-
-#ifdef OUTPUT_YUV_SKINMAP
-  fclose(yuv_skinmap_file);
-#endif
-#ifdef OUTPUT_YUV_REC
-  fclose(yuv_rec_file);
-#endif
-}
-
-static void generate_psnr_packet(AV1_COMP *cpi) {
-  struct aom_codec_cx_pkt pkt;
-  int i;
-  PSNR_STATS psnr;
-  aom_calc_highbd_psnr(cpi->source, cpi->common.frame_to_show, &psnr,
-                       cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
-
-  for (i = 0; i < 4; ++i) {
-    pkt.data.psnr.samples[i] = psnr.samples[i];
-    pkt.data.psnr.sse[i] = psnr.sse[i];
-    pkt.data.psnr.psnr[i] = psnr.psnr[i];
-  }
-  pkt.kind = AOM_CODEC_PSNR_PKT;
-  aom_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
-}
-
-int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags) {
-  if (ref_frame_flags > ((1 << INTER_REFS_PER_FRAME) - 1)) return -1;
-
-  cpi->ext_ref_frame_flags = ref_frame_flags;
-  return 0;
-}
-
-void av1_update_reference(AV1_COMP *cpi, int ref_frame_upd_flags) {
-  cpi->ext_refresh_last_frame = (ref_frame_upd_flags & AOM_LAST_FLAG) != 0;
-  cpi->ext_refresh_golden_frame = (ref_frame_upd_flags & AOM_GOLD_FLAG) != 0;
-  cpi->ext_refresh_alt_ref_frame = (ref_frame_upd_flags & AOM_ALT_FLAG) != 0;
-  cpi->ext_refresh_bwd_ref_frame = (ref_frame_upd_flags & AOM_BWD_FLAG) != 0;
-  cpi->ext_refresh_alt2_ref_frame = (ref_frame_upd_flags & AOM_ALT2_FLAG) != 0;
-  cpi->ext_refresh_frame_flags_pending = 1;
-}
-
-int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  YV12_BUFFER_CONFIG *cfg = get_ref_frame(cm, idx);
-  if (cfg) {
-    aom_yv12_copy_frame(cfg, sd, num_planes);
-    return 0;
-  } else {
-    return -1;
-  }
-}
-
-int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  YV12_BUFFER_CONFIG *cfg = get_ref_frame(cm, idx);
-  if (cfg) {
-    aom_yv12_copy_frame(sd, cfg, num_planes);
-    return 0;
-  } else {
-    return -1;
-  }
-}
-
-int av1_update_entropy(AV1_COMP *cpi, int update) {
-  cpi->ext_refresh_frame_context = update;
-  cpi->ext_refresh_frame_context_pending = 1;
-  return 0;
-}
-
-#if defined(OUTPUT_YUV_DENOISED) || defined(OUTPUT_YUV_SKINMAP)
-// The denoiser buffer is allocated as a YUV 440 buffer. This function writes it
-// as YUV 420. We simply use the top-left pixels of the UV buffers, since we do
-// not denoise the UV channels at this time. If ever we implement UV channel
-// denoising we will have to modify this.
-void aom_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) {
-  uint8_t *src = s->y_buffer;
-  int h = s->y_height;
-
-  do {
-    fwrite(src, s->y_width, 1, f);
-    src += s->y_stride;
-  } while (--h);
-
-  src = s->u_buffer;
-  h = s->uv_height;
-
-  do {
-    fwrite(src, s->uv_width, 1, f);
-    src += s->uv_stride;
-  } while (--h);
-
-  src = s->v_buffer;
-  h = s->uv_height;
-
-  do {
-    fwrite(src, s->uv_width, 1, f);
-    src += s->uv_stride;
-  } while (--h);
-}
-#endif
-
-static void check_show_existing_frame(AV1_COMP *cpi) {
-  const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-  AV1_COMMON *const cm = &cpi->common;
-  const FRAME_UPDATE_TYPE next_frame_update_type =
-      gf_group->update_type[gf_group->index];
-#if USE_SYMM_MULTI_LAYER
-  const int which_arf = (cpi->new_bwdref_update_rule == 1)
-                            ? gf_group->arf_update_idx[gf_group->index] > 0
-                            : gf_group->arf_update_idx[gf_group->index];
-#else
-  const int which_arf = gf_group->arf_update_idx[gf_group->index];
-#endif
-
-  if (cm->show_existing_frame == 1) {
-    cm->show_existing_frame = 0;
-  } else if (cpi->rc.is_last_bipred_frame) {
-#if USE_SYMM_MULTI_LAYER
-    // NOTE: When new structure is used, every bwdref will have one overlay
-    //       frame. Therefore, there is no need to find out which frame to
-    //       show in advance.
-    if (cpi->new_bwdref_update_rule == 0) {
-#endif
-      // NOTE: If the current frame is a last bi-predictive frame, it is
-      //       needed next to show the BWDREF_FRAME, which is pointed by
-      //       the last_fb_idxes[0] after reference frame buffer update
-      cpi->rc.is_last_bipred_frame = 0;
-      cm->show_existing_frame = 1;
-      cpi->existing_fb_idx_to_show = cpi->ref_fb_idx[0];
-#if USE_SYMM_MULTI_LAYER
-    }
-#endif
-  } else if (cpi->is_arf_filter_off[which_arf] &&
-             (next_frame_update_type == OVERLAY_UPDATE ||
-              next_frame_update_type == INTNL_OVERLAY_UPDATE)) {
-#if USE_SYMM_MULTI_LAYER
-    const int bwdref_to_show =
-        (cpi->new_bwdref_update_rule == 1) ? BWDREF_FRAME : ALTREF2_FRAME;
-#else
-    const int bwdref_to_show = ALTREF2_FRAME;
-#endif
-    // Other parameters related to OVERLAY_UPDATE will be taken care of
-    // in av1_rc_get_second_pass_params(cpi)
-    cm->show_existing_frame = 1;
-    cpi->rc.is_src_frame_alt_ref = 1;
-    cpi->existing_fb_idx_to_show = (next_frame_update_type == OVERLAY_UPDATE)
-                                       ? cpi->ref_fb_idx[ALTREF_FRAME - 1]
-                                       : cpi->ref_fb_idx[bwdref_to_show - 1];
-#if USE_SYMM_MULTI_LAYER
-    if (cpi->new_bwdref_update_rule == 0)
-#endif
-      cpi->is_arf_filter_off[which_arf] = 0;
-  }
-  cpi->rc.is_src_frame_ext_arf = 0;
-}
-
-#ifdef OUTPUT_YUV_REC
-void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) {
-  uint8_t *src = s->y_buffer;
-  int h = cm->height;
-  if (yuv_rec_file == NULL) return;
-  if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
-    uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
-
-    do {
-      fwrite(src16, s->y_width, 2, yuv_rec_file);
-      src16 += s->y_stride;
-    } while (--h);
-
-    src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
-    h = s->uv_height;
-
-    do {
-      fwrite(src16, s->uv_width, 2, yuv_rec_file);
-      src16 += s->uv_stride;
-    } while (--h);
-
-    src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
-    h = s->uv_height;
-
-    do {
-      fwrite(src16, s->uv_width, 2, yuv_rec_file);
-      src16 += s->uv_stride;
-    } while (--h);
-
-    fflush(yuv_rec_file);
-    return;
-  }
-
-  do {
-    fwrite(src, s->y_width, 1, yuv_rec_file);
-    src += s->y_stride;
-  } while (--h);
-
-  src = s->u_buffer;
-  h = s->uv_height;
-
-  do {
-    fwrite(src, s->uv_width, 1, yuv_rec_file);
-    src += s->uv_stride;
-  } while (--h);
-
-  src = s->v_buffer;
-  h = s->uv_height;
-
-  do {
-    fwrite(src, s->uv_width, 1, yuv_rec_file);
-    src += s->uv_stride;
-  } while (--h);
-
-  fflush(yuv_rec_file);
-}
-#endif  // OUTPUT_YUV_REC
-
-#define GM_RECODE_LOOP_NUM4X4_FACTOR 192
-static int recode_loop_test_global_motion(AV1_COMP *cpi) {
-  int i;
-  int recode = 0;
-  RD_COUNTS *const rdc = &cpi->td.rd_counts;
-  AV1_COMMON *const cm = &cpi->common;
-  for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
-    if (cm->global_motion[i].wmtype != IDENTITY &&
-        rdc->global_motion_used[i] * GM_RECODE_LOOP_NUM4X4_FACTOR <
-            cpi->gmparams_cost[i]) {
-      cm->global_motion[i] = default_warp_params;
-      assert(cm->global_motion[i].wmtype == IDENTITY);
-      cpi->gmparams_cost[i] = 0;
-      recode = 1;
-      // TODO(sarahparker): The earlier condition for recoding here was:
-      // "recode |= (rdc->global_motion_used[i] > 0);". Can we bring something
-      // similar to that back to speed up global motion?
-    }
-  }
-  return recode;
-}
-
-// Function to test for conditions that indicate we should loop
-// back and recode a frame.
-static int recode_loop_test(AV1_COMP *cpi, int high_limit, int low_limit, int q,
-                            int maxq, int minq) {
-  const RATE_CONTROL *const rc = &cpi->rc;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
-  int force_recode = 0;
-
-  if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
-      (cpi->sf.recode_loop == ALLOW_RECODE) ||
-      (frame_is_kfgfarf && (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) {
-    // TODO(agrange) high_limit could be greater than the scale-down threshold.
-    if ((rc->projected_frame_size > high_limit && q < maxq) ||
-        (rc->projected_frame_size < low_limit && q > minq)) {
-      force_recode = 1;
-    } else if (cpi->oxcf.rc_mode == AOM_CQ) {
-      // Deal with frame undershoot and whether or not we are
-      // below the automatically set cq level.
-      if (q > oxcf->cq_level &&
-          rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
-        force_recode = 1;
-      }
-    }
-  }
-  return force_recode;
-}
-
-#define DUMP_REF_FRAME_IMAGES 0
-
-#if DUMP_REF_FRAME_IMAGES == 1
-static int dump_one_image(AV1_COMMON *cm,
-                          const YV12_BUFFER_CONFIG *const ref_buf,
-                          char *file_name) {
-  int h;
-  FILE *f_ref = NULL;
-
-  if (ref_buf == NULL) {
-    printf("Frame data buffer is NULL.\n");
-    return AOM_CODEC_MEM_ERROR;
-  }
-
-  if ((f_ref = fopen(file_name, "wb")) == NULL) {
-    printf("Unable to open file %s to write.\n", file_name);
-    return AOM_CODEC_MEM_ERROR;
-  }
-
-  // --- Y ---
-  for (h = 0; h < cm->height; ++h) {
-    fwrite(&ref_buf->y_buffer[h * ref_buf->y_stride], 1, cm->width, f_ref);
-  }
-  // --- U ---
-  for (h = 0; h < (cm->height >> 1); ++h) {
-    fwrite(&ref_buf->u_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1),
-           f_ref);
-  }
-  // --- V ---
-  for (h = 0; h < (cm->height >> 1); ++h) {
-    fwrite(&ref_buf->v_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1),
-           f_ref);
-  }
-
-  fclose(f_ref);
-
-  return AOM_CODEC_OK;
-}
-
-static void dump_ref_frame_images(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  MV_REFERENCE_FRAME ref_frame;
-
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    char file_name[256] = "";
-    snprintf(file_name, sizeof(file_name), "/tmp/enc_F%d_ref_%d.yuv",
-             cm->current_video_frame, ref_frame);
-    dump_one_image(cm, get_ref_frame_buffer(cpi, ref_frame), file_name);
-  }
-}
-#endif  // DUMP_REF_FRAME_IMAGES == 1
-
-// This function is used to shift the virtual indices of last reference frames
-// as follows:
-// LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME
-// when the LAST_FRAME is updated.
-static INLINE void shift_last_ref_frames(AV1_COMP *cpi) {
-  // TODO(isbs): shift the scaled indices as well
-  int ref_frame;
-  for (ref_frame = LAST_REF_FRAMES - 1; ref_frame > 0; --ref_frame) {
-    cpi->ref_fb_idx[ref_frame] = cpi->ref_fb_idx[ref_frame - 1];
-
-    // [0] is allocated to the current coded frame. The statistics for the
-    // reference frames start at [LAST_FRAME], i.e. [1].
-    if (!cpi->rc.is_src_frame_alt_ref) {
-      memcpy(cpi->interp_filter_selected[ref_frame + LAST_FRAME],
-             cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME],
-             sizeof(cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME]));
-    }
-  }
-}
-
-#if USE_SYMM_MULTI_LAYER
-// This function is used to shift the virtual indices of bwd reference
-// frames as follows:
-// BWD_REF -> ALT2_REF -> EXT_REF
-// to clear a space to store the closest bwdref
-static INLINE void rshift_bwd_ref_frames(AV1_COMP *cpi) {
-  // TODO(isbs): shift the scaled indices as well
-  static const int ordered_bwd[3] = { BWDREF_FRAME - 1, ALTREF2_FRAME - 1,
-                                      EXTREF_FRAME - 1 };
-
-  for (int i = 2; i > 0; --i) {
-    // [0] is allocated to the current coded frame, i.e. bwdref
-    memcpy(
-        cpi->interp_filter_selected[ordered_bwd[i] + LAST_FRAME],
-        cpi->interp_filter_selected[ordered_bwd[i - 1] + LAST_FRAME],
-        sizeof(cpi->interp_filter_selected[ordered_bwd[i - 1] + LAST_FRAME]));
-
-    cpi->ref_fb_idx[ordered_bwd[i]] = cpi->ref_fb_idx[ordered_bwd[i - 1]];
-  }
-}
-
-// This function is used to shift the virtual indices of bwd reference
-// frames as follows:
-// BWD_REF <- ALT2_REF <- EXT_REF
-// to update the bwd reference frame for coding the next frame.
-static INLINE void lshift_bwd_ref_frames(AV1_COMP *cpi) {
-  // TODO(isbs): shift the scaled indices as well
-  static const int ordered_bwd[3] = { BWDREF_FRAME - 1, ALTREF2_FRAME - 1,
-                                      EXTREF_FRAME - 1 };
-
-  for (int i = 0; i < 2; ++i) {
-    // [0] is allocated to the current coded frame, i.e. bwdref
-    memcpy(
-        cpi->interp_filter_selected[ordered_bwd[i] + LAST_FRAME],
-        cpi->interp_filter_selected[ordered_bwd[i + 1] + LAST_FRAME],
-        sizeof(cpi->interp_filter_selected[ordered_bwd[i + 1] + LAST_FRAME]));
-
-    cpi->ref_fb_idx[ordered_bwd[i]] = cpi->ref_fb_idx[ordered_bwd[i + 1]];
-  }
-}
-#endif  // USE_SYMM_MULTI_LAYER
-
-static void update_reference_frames(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-
-  // NOTE: Save the new show frame buffer index for --test-code=warn, i.e.,
-  //       for the purpose to verify no mismatch between encoder and decoder.
-  if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx;
-
-  // In the case of show_existing frame, we will not send fresh flag
-  // to decoder. Any change in the reference frame buffer can be done by
-  // switching the virtual indices.
-  if (cm->show_existing_frame) {
-    cpi->refresh_last_frame = 0;
-    cpi->refresh_golden_frame = 0;
-    cpi->refresh_bwd_ref_frame = 0;
-    cpi->refresh_alt2_ref_frame = 0;
-    cpi->refresh_alt_ref_frame = 0;
-
-    cpi->rc.is_bwd_ref_frame = 0;
-    cpi->rc.is_last_bipred_frame = 0;
-    cpi->rc.is_bipred_frame = 0;
-  }
-
-  BufferPool *const pool = cm->buffer_pool;
-
-  // At this point the new frame has been encoded.
-  // If any buffer copy / swapping is signaled it should be done here.
-
-  // Only update all of the reference buffers if a KEY_FRAME is also a
-  // show_frame. This ensures a fwd keyframe does not update all of the buffers
-  if ((cm->frame_type == KEY_FRAME && cm->show_frame) || frame_is_sframe(cm)) {
-    for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
-      ref_cnt_fb(pool->frame_bufs,
-                 &cm->ref_frame_map[cpi->ref_fb_idx[ref_frame]],
-                 cm->new_fb_idx);
-    }
-    return;
-  }
-
-  if (av1_preserve_existing_gf(cpi)) {
-    // We have decided to preserve the previously existing golden frame as our
-    // new ARF frame. However, in the short term in function
-    // av1_bitstream.c::get_refresh_mask() we left it in the GF slot and, if
-    // we're updating the GF with the current decoded frame, we save it to the
-    // ARF slot instead.
-    // We now have to update the ARF with the current frame and swap gld_fb_idx
-    // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
-    // slot and, if we're updating the GF, the current frame becomes the new GF.
-    int tmp;
-
-    // ARF in general is a better reference than overlay. We shouldkeep ARF as
-    // reference instead of replacing it with overlay.
-
-    if (!cpi->preserve_arf_as_gld) {
-      ref_cnt_fb(pool->frame_bufs,
-                 &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF_FRAME - 1]],
-                 cm->new_fb_idx);
-    }
-
-    tmp = cpi->ref_fb_idx[ALTREF_FRAME - 1];
-    cpi->ref_fb_idx[ALTREF_FRAME - 1] = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
-    cpi->ref_fb_idx[GOLDEN_FRAME - 1] = tmp;
-
-    // TODO(zoeliu): Do we need to copy cpi->interp_filter_selected[0] over to
-    // cpi->interp_filter_selected[GOLDEN_FRAME]?
-  } else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) {
-#if CONFIG_DEBUG
-    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-    assert(gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE);
-#endif
-#if USE_SYMM_MULTI_LAYER
-    const int bwdref_to_show =
-        (cpi->new_bwdref_update_rule == 1) ? BWDREF_FRAME : ALTREF2_FRAME;
-#else
-    const int bwdref_to_show = ALTREF2_FRAME;
-#endif
-    // Deal with the special case for showing existing internal ALTREF_FRAME
-    // Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME
-    // by updating the virtual indices.
-    const int tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
-    shift_last_ref_frames(cpi);
-
-    cpi->ref_fb_idx[LAST_FRAME - 1] = cpi->ref_fb_idx[bwdref_to_show - 1];
-
-    memcpy(cpi->interp_filter_selected[LAST_FRAME],
-           cpi->interp_filter_selected[bwdref_to_show],
-           sizeof(cpi->interp_filter_selected[bwdref_to_show]));
-#if USE_SYMM_MULTI_LAYER
-    if (cpi->new_bwdref_update_rule == 1) {
-      lshift_bwd_ref_frames(cpi);
-      // pass outdated forward reference frame (previous LAST3) to the
-      // spared space
-      cpi->ref_fb_idx[EXTREF_FRAME - 1] = tmp;
-    } else {
-#endif
-      cpi->ref_fb_idx[bwdref_to_show - 1] = tmp;
-#if USE_SYMM_MULTI_LAYER
-    }
-#endif
-  } else { /* For non key/golden frames */
-    // === ALTREF_FRAME ===
-    if (cpi->refresh_alt_ref_frame) {
-      int arf_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
-      ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
-
-      memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
-             cpi->interp_filter_selected[0],
-             sizeof(cpi->interp_filter_selected[0]));
-    }
-
-    // === GOLDEN_FRAME ===
-    if (cpi->refresh_golden_frame) {
-      ref_cnt_fb(pool->frame_bufs,
-                 &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]],
-                 cm->new_fb_idx);
-
-      memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
-             cpi->interp_filter_selected[0],
-             sizeof(cpi->interp_filter_selected[0]));
-    }
-
-    // === BWDREF_FRAME ===
-    if (cpi->refresh_bwd_ref_frame) {
-#if USE_SYMM_MULTI_LAYER
-      if (cpi->new_bwdref_update_rule) {
-        // We shift the backward reference frame as follows:
-        // BWDREF -> ALTREF2 -> EXTREF
-        // and assign the newly coded frame to BWDREF so that it always
-        // keeps the nearest future frame
-        int tmp = cpi->ref_fb_idx[EXTREF_FRAME - 1];
-        ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[tmp], cm->new_fb_idx);
-
-        rshift_bwd_ref_frames(cpi);
-        cpi->ref_fb_idx[BWDREF_FRAME - 1] = tmp;
-      } else {
-#endif  // USE_SYMM_MULTI_LAYER
-        ref_cnt_fb(pool->frame_bufs,
-                   &cm->ref_frame_map[cpi->ref_fb_idx[BWDREF_FRAME - 1]],
-                   cm->new_fb_idx);
-#if USE_SYMM_MULTI_LAYER
-      }
-#endif
-      memcpy(cpi->interp_filter_selected[BWDREF_FRAME],
-             cpi->interp_filter_selected[0],
-             sizeof(cpi->interp_filter_selected[0]));
-    }
-
-    // === ALTREF2_FRAME ===
-    if (cpi->refresh_alt2_ref_frame) {
-      ref_cnt_fb(pool->frame_bufs,
-                 &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]],
-                 cm->new_fb_idx);
-
-      memcpy(cpi->interp_filter_selected[ALTREF2_FRAME],
-             cpi->interp_filter_selected[0],
-             sizeof(cpi->interp_filter_selected[0]));
-    }
-  }
-
-  if (cpi->refresh_last_frame) {
-    // NOTE(zoeliu): We have two layers of mapping (1) from the per-frame
-    // reference to the reference frame buffer virtual index; and then (2) from
-    // the virtual index to the reference frame buffer physical index:
-    //
-    // LAST_FRAME,      ..., LAST3_FRAME,     ..., ALTREF_FRAME
-    //      |                     |                     |
-    //      v                     v                     v
-    // ref_fb_idx[0],   ..., ref_fb_idx[2],   ..., ref_fb_idx[ALTREF_FRAME-1]
-    //      |                     |                     |
-    //      v                     v                     v
-    // ref_frame_map[], ..., ref_frame_map[], ..., ref_frame_map[]
-    //
-    // When refresh_last_frame is set, it is intended to retire LAST3_FRAME,
-    // have the other 2 LAST reference frames shifted as follows:
-    // LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME
-    // , and then have LAST_FRAME refreshed by the newly coded frame.
-    //
-    // To fulfill it, the decoder will be notified to execute following 2 steps:
-    //
-    // (a) To change ref_frame_map[] and have the virtual index of LAST3_FRAME
-    //     to point to the newly coded frame, i.e.
-    //     ref_frame_map[lst_fb_idexes[2]] => new_fb_idx;
-    //
-    // (b) To change the 1st layer mapping to have LAST_FRAME mapped to the
-    //     original virtual index of LAST3_FRAME and have the other mappings
-    //     shifted as follows:
-    // LAST_FRAME,      LAST2_FRAME,     LAST3_FRAME
-    //      |                |                |
-    //      v                v                v
-    // ref_fb_idx[2],   ref_fb_idx[0],   ref_fb_idx[1]
-    int tmp;
-
-    ref_cnt_fb(pool->frame_bufs,
-               &cm->ref_frame_map[cpi->ref_fb_idx[LAST_REF_FRAMES - 1]],
-               cm->new_fb_idx);
-
-    tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
-
-    shift_last_ref_frames(cpi);
-    cpi->ref_fb_idx[0] = tmp;
-
-    assert(cm->show_existing_frame == 0);
-    memcpy(cpi->interp_filter_selected[LAST_FRAME],
-           cpi->interp_filter_selected[0],
-           sizeof(cpi->interp_filter_selected[0]));
-
-    // If the new structure is used, we will always have overlay frames coupled
-    // with bwdref frames. Therefore, we won't have to perform this update
-    // in advance (we do this update when the overlay frame shows up).
-#if USE_SYMM_MULTI_LAYER
-    if (cpi->new_bwdref_update_rule == 0 && cpi->rc.is_last_bipred_frame) {
-#else
-    if (cpi->rc.is_last_bipred_frame) {
-#endif
-      // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the
-      // LAST3_FRAME by updating the virtual indices.
-      //
-      // NOTE: The source frame for BWDREF does not have a holding position as
-      //       the OVERLAY frame for ALTREF's. Hence, to resolve the reference
-      //       virtual index reshuffling for BWDREF, the encoder always
-      //       specifies a LAST_BIPRED right before BWDREF and completes the
-      //       reshuffling job accordingly.
-      tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1];
-
-      shift_last_ref_frames(cpi);
-      cpi->ref_fb_idx[0] = cpi->ref_fb_idx[BWDREF_FRAME - 1];
-      cpi->ref_fb_idx[BWDREF_FRAME - 1] = tmp;
-
-      memcpy(cpi->interp_filter_selected[LAST_FRAME],
-             cpi->interp_filter_selected[BWDREF_FRAME],
-             sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
-    }
-  }
-
-#if DUMP_REF_FRAME_IMAGES == 1
-  // Dump out all reference frame images.
-  dump_ref_frame_images(cpi);
-#endif  // DUMP_REF_FRAME_IMAGES
-}
-
-static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, int buffer_idx) {
-  assert(buffer_idx != INVALID_IDX);
-  RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
-  ensure_mv_buffer(new_fb_ptr, cm);
-  new_fb_ptr->width = cm->width;
-  new_fb_ptr->height = cm->height;
-}
-
-static void scale_references(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MV_REFERENCE_FRAME ref_frame;
-  const AOM_REFFRAME ref_mask[INTER_REFS_PER_FRAME] = {
-    AOM_LAST_FLAG, AOM_LAST2_FLAG, AOM_LAST3_FLAG, AOM_GOLD_FLAG,
-    AOM_BWD_FLAG,  AOM_ALT2_FLAG,  AOM_ALT_FLAG
-  };
-
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    // Need to convert from AOM_REFFRAME to index into ref_mask (subtract 1).
-    if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
-      BufferPool *const pool = cm->buffer_pool;
-      const YV12_BUFFER_CONFIG *const ref =
-          get_ref_frame_buffer(cpi, ref_frame);
-
-      if (ref == NULL) {
-        cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
-        continue;
-      }
-
-      if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
-        RefCntBuffer *new_fb_ptr = NULL;
-        int force_scaling = 0;
-        int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
-        if (new_fb == INVALID_IDX) {
-          new_fb = get_free_fb(cm);
-          force_scaling = 1;
-        }
-        if (new_fb == INVALID_IDX) return;
-        new_fb_ptr = &pool->frame_bufs[new_fb];
-        if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
-            new_fb_ptr->buf.y_crop_height != cm->height) {
-          if (aom_realloc_frame_buffer(
-                  &new_fb_ptr->buf, cm->width, cm->height,
-                  cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
-                  cm->seq_params.use_highbitdepth, AOM_BORDER_IN_PIXELS,
-                  cm->byte_alignment, NULL, NULL, NULL))
-            aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                               "Failed to allocate frame buffer");
-          av1_resize_and_extend_frame(
-              ref, &new_fb_ptr->buf, (int)cm->seq_params.bit_depth, num_planes);
-          cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
-          alloc_frame_mvs(cm, new_fb);
-        }
-      } else {
-        const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
-        RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
-        buf->buf.y_crop_width = ref->y_crop_width;
-        buf->buf.y_crop_height = ref->y_crop_height;
-        cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
-        ++buf->ref_count;
-      }
-    } else {
-      if (cpi->oxcf.pass != 0) cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
-    }
-  }
-}
-
-static void release_scaled_references(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  int i;
-  // TODO(isbs): only refresh the necessary frames, rather than all of them
-  for (i = 0; i < REF_FRAMES; ++i) {
-    const int idx = cpi->scaled_ref_idx[i];
-    RefCntBuffer *const buf =
-        idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
-    if (buf != NULL) {
-      --buf->ref_count;
-      cpi->scaled_ref_idx[i] = INVALID_IDX;
-    }
-  }
-}
-
-static void set_mv_search_params(AV1_COMP *cpi) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const unsigned int max_mv_def = AOMMIN(cm->width, cm->height);
-
-  // Default based on max resolution.
-  cpi->mv_step_param = av1_init_search_range(max_mv_def);
-
-  if (cpi->sf.mv.auto_mv_step_size) {
-    if (frame_is_intra_only(cm)) {
-      // Initialize max_mv_magnitude for use in the first INTER frame
-      // after a key/intra-only frame.
-      cpi->max_mv_magnitude = max_mv_def;
-    } else {
-      if (cm->show_frame) {
-        // Allow mv_steps to correspond to twice the max mv magnitude found
-        // in the previous frame, capped by the default max_mv_magnitude based
-        // on resolution.
-        cpi->mv_step_param = av1_init_search_range(
-            AOMMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
-      }
-      cpi->max_mv_magnitude = 0;
-    }
-  }
-}
-
-static void set_size_independent_vars(AV1_COMP *cpi) {
-  int i;
-  for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
-    cpi->common.global_motion[i] = default_warp_params;
-  }
-  cpi->global_motion_search_done = 0;
-  av1_set_speed_features_framesize_independent(cpi);
-  av1_set_rd_speed_thresholds(cpi);
-  av1_set_rd_speed_thresholds_sub8x8(cpi);
-  cpi->common.interp_filter = SWITCHABLE;
-  cpi->common.switchable_motion_mode = 1;
-}
-
-static void set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index,
-                                    int *top_index) {
-  AV1_COMMON *const cm = &cpi->common;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-
-  // Setup variables that depend on the dimensions of the frame.
-  av1_set_speed_features_framesize_dependent(cpi);
-
-  // Decide q and q bounds.
-  *q = av1_rc_pick_q_and_bounds(cpi, cm->width, cm->height, bottom_index,
-                                top_index);
-
-  if (!frame_is_intra_only(cm)) {
-    set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH,
-                          cpi->common.cur_frame_force_integer_mv);
-  }
-
-  // Configure experimental use of segmentation for enhanced coding of
-  // static regions if indicated.
-  // Only allowed in the second pass of a two pass encode, as it requires
-  // lagged coding, and if the relevant speed feature flag is set.
-  if (oxcf->pass == 2 && cpi->sf.static_segmentation)
-    configure_static_seg_features(cpi);
-}
-
-static void init_motion_estimation(AV1_COMP *cpi) {
-  int y_stride = cpi->scaled_source.y_stride;
-
-  if (cpi->sf.mv.search_method == NSTEP) {
-    av1_init3smotion_compensation(&cpi->ss_cfg, y_stride);
-  } else if (cpi->sf.mv.search_method == DIAMOND) {
-    av1_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
-  }
-}
-
-#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0
-static void set_restoration_unit_size(int width, int height, int sx, int sy,
-                                      RestorationInfo *rst) {
-  (void)width;
-  (void)height;
-  (void)sx;
-  (void)sy;
-#if COUPLED_CHROMA_FROM_LUMA_RESTORATION
-  int s = AOMMIN(sx, sy);
-#else
-  int s = 0;
-#endif  // !COUPLED_CHROMA_FROM_LUMA_RESTORATION
-
-  if (width * height > 352 * 288)
-    rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX;
-  else
-    rst[0].restoration_unit_size = (RESTORATION_UNITSIZE_MAX >> 1);
-  rst[1].restoration_unit_size = rst[0].restoration_unit_size >> s;
-  rst[2].restoration_unit_size = rst[1].restoration_unit_size;
-}
-
-static void init_ref_frame_bufs(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  int i;
-  BufferPool *const pool = cm->buffer_pool;
-  cm->new_fb_idx = INVALID_IDX;
-  for (i = 0; i < REF_FRAMES; ++i) {
-    cm->ref_frame_map[i] = INVALID_IDX;
-    pool->frame_bufs[i].ref_count = 0;
-  }
-  if (cm->seq_params.force_screen_content_tools) {
-    for (i = 0; i < FRAME_BUFFERS; ++i) {
-      av1_hash_table_init(&pool->frame_bufs[i].hash_table, &cpi->td.mb);
-    }
-  }
-}
-
-static void check_initial_width(AV1_COMP *cpi, int use_highbitdepth,
-                                int subsampling_x, int subsampling_y) {
-  AV1_COMMON *const cm = &cpi->common;
-  SequenceHeader *const seq_params = &cm->seq_params;
-
-  if (!cpi->initial_width || seq_params->use_highbitdepth != use_highbitdepth ||
-      seq_params->subsampling_x != subsampling_x ||
-      seq_params->subsampling_y != subsampling_y) {
-    seq_params->subsampling_x = subsampling_x;
-    seq_params->subsampling_y = subsampling_y;
-    seq_params->use_highbitdepth = use_highbitdepth;
-
-    alloc_raw_frame_buffers(cpi);
-    init_ref_frame_bufs(cpi);
-    alloc_util_frame_buffers(cpi);
-
-    init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
-
-    cpi->initial_width = cm->width;
-    cpi->initial_height = cm->height;
-    cpi->initial_mbs = cm->MBs;
-  }
-}
-
-// Returns 1 if the assigned width or height was <= 0.
-static int set_size_literal(AV1_COMP *cpi, int width, int height) {
-  AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  check_initial_width(cpi, cm->seq_params.use_highbitdepth,
-                      cm->seq_params.subsampling_x,
-                      cm->seq_params.subsampling_y);
-
-  if (width <= 0 || height <= 0) return 1;
-
-  cm->width = width;
-  cm->height = height;
-
-  if (cpi->initial_width && cpi->initial_height &&
-      (cm->width > cpi->initial_width || cm->height > cpi->initial_height)) {
-    av1_free_context_buffers(cm);
-    av1_free_pc_tree(&cpi->td, num_planes);
-    alloc_compressor_data(cpi);
-    realloc_segmentation_maps(cpi);
-    cpi->initial_width = cpi->initial_height = 0;
-  }
-  update_frame_size(cpi);
-
-  return 0;
-}
-
-static void set_frame_size(AV1_COMP *cpi, int width, int height) {
-  AV1_COMMON *const cm = &cpi->common;
-  const SequenceHeader *const seq_params = &cm->seq_params;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-  int ref_frame;
-
-  if (width != cm->width || height != cm->height) {
-    // There has been a change in the encoded frame size
-    set_size_literal(cpi, width, height);
-    set_mv_search_params(cpi);
-    // Recalculate 'all_lossless' in case super-resolution was (un)selected.
-    cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
-  }
-
-  if (cpi->oxcf.pass == 2) {
-    av1_set_target_rate(cpi, cm->width, cm->height);
-  }
-
-  alloc_frame_mvs(cm, cm->new_fb_idx);
-
-  // Allocate above context buffers
-  if (cm->num_allocated_above_context_planes < av1_num_planes(cm) ||
-      cm->num_allocated_above_context_mi_col < cm->mi_cols ||
-      cm->num_allocated_above_contexts < cm->tile_rows) {
-    av1_free_above_context_buffers(cm, cm->num_allocated_above_contexts);
-    if (av1_alloc_above_context_buffers(cm, cm->tile_rows))
-      aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                         "Failed to allocate context buffers");
-  }
-
-  // Reset the frame pointers to the current frame size.
-  if (aom_realloc_frame_buffer(
-          get_frame_new_buffer(cm), cm->width, cm->height,
-          seq_params->subsampling_x, seq_params->subsampling_y,
-          seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
-          cm->byte_alignment, NULL, NULL, NULL))
-    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                       "Failed to allocate frame buffer");
-
-  const int frame_width = cm->superres_upscaled_width;
-  const int frame_height = cm->superres_upscaled_height;
-  set_restoration_unit_size(frame_width, frame_height,
-                            seq_params->subsampling_x,
-                            seq_params->subsampling_y, cm->rst_info);
-  for (int i = 0; i < num_planes; ++i)
-    cm->rst_info[i].frame_restoration_type = RESTORE_NONE;
-
-  av1_alloc_restoration_buffers(cm);
-  alloc_util_frame_buffers(cpi);  // TODO(afergs): Remove? Gets called anyways.
-  init_motion_estimation(cpi);
-
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - LAST_FRAME];
-    const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
-
-    ref_buf->idx = buf_idx;
-
-    if (buf_idx != INVALID_IDX) {
-      YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
-      ref_buf->buf = buf;
-      av1_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
-                                        buf->y_crop_height, cm->width,
-                                        cm->height);
-      if (av1_is_scaled(&ref_buf->sf))
-        aom_extend_frame_borders(buf, num_planes);
-    } else {
-      ref_buf->buf = NULL;
-    }
-  }
-
-  av1_setup_scale_factors_for_frame(&cm->sf_identity, cm->width, cm->height,
-                                    cm->width, cm->height);
-
-  set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
-}
-
-static uint8_t calculate_next_resize_scale(const AV1_COMP *cpi) {
-  // Choose an arbitrary random number
-  static unsigned int seed = 56789;
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  if (oxcf->pass == 1) return SCALE_NUMERATOR;
-  uint8_t new_denom = SCALE_NUMERATOR;
-
-  if (cpi->common.seq_params.reduced_still_picture_hdr) return SCALE_NUMERATOR;
-  switch (oxcf->resize_mode) {
-    case RESIZE_NONE: new_denom = SCALE_NUMERATOR; break;
-    case RESIZE_FIXED:
-      if (cpi->common.frame_type == KEY_FRAME)
-        new_denom = oxcf->resize_kf_scale_denominator;
-      else
-        new_denom = oxcf->resize_scale_denominator;
-      break;
-    case RESIZE_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break;
-    default: assert(0);
-  }
-  return new_denom;
-}
-
-static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) {
-  // Choose an arbitrary random number
-  static unsigned int seed = 34567;
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  if (oxcf->pass == 1) return SCALE_NUMERATOR;
-  uint8_t new_denom = SCALE_NUMERATOR;
-
-  // Make sure that superres mode of the frame is consistent with the
-  // sequence-level flag.
-  assert(IMPLIES(oxcf->superres_mode != SUPERRES_NONE,
-                 cpi->common.seq_params.enable_superres));
-  assert(IMPLIES(!cpi->common.seq_params.enable_superres,
-                 oxcf->superres_mode == SUPERRES_NONE));
-
-  switch (oxcf->superres_mode) {
-    case SUPERRES_NONE: new_denom = SCALE_NUMERATOR; break;
-    case SUPERRES_FIXED:
-      if (cpi->common.frame_type == KEY_FRAME)
-        new_denom = oxcf->superres_kf_scale_denominator;
-      else
-        new_denom = oxcf->superres_scale_denominator;
-      break;
-    case SUPERRES_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break;
-    case SUPERRES_QTHRESH: {
-      const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-      const RATE_FACTOR_LEVEL rf_level = gf_group->rf_level[gf_group->index];
-      const double rate_factor_delta = rate_factor_deltas[rf_level];
-      const int qthresh = (rate_factor_delta <= 1.0)
-                              ? oxcf->superres_qthresh
-                              : oxcf->superres_kf_qthresh;
-      av1_set_target_rate(cpi, cpi->oxcf.width, cpi->oxcf.height);
-      int bottom_index, top_index;
-      const int q = av1_rc_pick_q_and_bounds(
-          cpi, cpi->oxcf.width, cpi->oxcf.height, &bottom_index, &top_index);
-      if (q < qthresh) {
-        new_denom = SCALE_NUMERATOR;
-      } else {
-        const uint8_t min_denom = SCALE_NUMERATOR + 1;
-        const uint8_t denom_step = (MAXQ - qthresh + 1) >> 3;
-
-        if (q == qthresh) {
-          new_denom = min_denom;
-        } else if (denom_step == 0) {
-          new_denom = SCALE_NUMERATOR << 1;
-        } else {
-          const uint8_t additional_denom = (q - qthresh) / denom_step;
-          new_denom =
-              AOMMIN(min_denom + additional_denom, SCALE_NUMERATOR << 1);
-        }
-      }
-      break;
-    }
-    default: assert(0);
-  }
-  return new_denom;
-}
-
-static int dimension_is_ok(int orig_dim, int resized_dim, int denom) {
-  return (resized_dim * SCALE_NUMERATOR >= orig_dim * denom / 2);
-}
-
-static int dimensions_are_ok(int owidth, int oheight, size_params_type *rsz) {
-  // Only need to check the width, as scaling is horizontal only.
-  (void)oheight;
-  return dimension_is_ok(owidth, rsz->resize_width, rsz->superres_denom);
-}
-
-static int validate_size_scales(RESIZE_MODE resize_mode,
-                                SUPERRES_MODE superres_mode, int owidth,
-                                int oheight, size_params_type *rsz) {
-  if (dimensions_are_ok(owidth, oheight, rsz)) {  // Nothing to do.
-    return 1;
-  }
-
-  // Calculate current resize scale.
-  int resize_denom =
-      AOMMAX(DIVIDE_AND_ROUND(owidth * SCALE_NUMERATOR, rsz->resize_width),
-             DIVIDE_AND_ROUND(oheight * SCALE_NUMERATOR, rsz->resize_height));
-
-  if (resize_mode != RESIZE_RANDOM && superres_mode == SUPERRES_RANDOM) {
-    // Alter superres scale as needed to enforce conformity.
-    rsz->superres_denom =
-        (2 * SCALE_NUMERATOR * SCALE_NUMERATOR) / resize_denom;
-    if (!dimensions_are_ok(owidth, oheight, rsz)) {
-      if (rsz->superres_denom > SCALE_NUMERATOR) --rsz->superres_denom;
-    }
-  } else if (resize_mode == RESIZE_RANDOM && superres_mode != SUPERRES_RANDOM) {
-    // Alter resize scale as needed to enforce conformity.
-    resize_denom =
-        (2 * SCALE_NUMERATOR * SCALE_NUMERATOR) / rsz->superres_denom;
-    rsz->resize_width = owidth;
-    rsz->resize_height = oheight;
-    av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
-                              resize_denom);
-    if (!dimensions_are_ok(owidth, oheight, rsz)) {
-      if (resize_denom > SCALE_NUMERATOR) {
-        --resize_denom;
-        rsz->resize_width = owidth;
-        rsz->resize_height = oheight;
-        av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
-                                  resize_denom);
-      }
-    }
-  } else if (resize_mode == RESIZE_RANDOM && superres_mode == SUPERRES_RANDOM) {
-    // Alter both resize and superres scales as needed to enforce conformity.
-    do {
-      if (resize_denom > rsz->superres_denom)
-        --resize_denom;
-      else
-        --rsz->superres_denom;
-      rsz->resize_width = owidth;
-      rsz->resize_height = oheight;
-      av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
-                                resize_denom);
-    } while (!dimensions_are_ok(owidth, oheight, rsz) &&
-             (resize_denom > SCALE_NUMERATOR ||
-              rsz->superres_denom > SCALE_NUMERATOR));
-  } else {  // We are allowed to alter neither resize scale nor superres
-            // scale.
-    return 0;
-  }
-  return dimensions_are_ok(owidth, oheight, rsz);
-}
-
-// Calculates resize and superres params for next frame
-size_params_type av1_calculate_next_size_params(AV1_COMP *cpi) {
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  size_params_type rsz = { oxcf->width, oxcf->height, SCALE_NUMERATOR };
-  int resize_denom;
-  if (oxcf->pass == 1) return rsz;
-  if (cpi->resize_pending_width && cpi->resize_pending_height) {
-    rsz.resize_width = cpi->resize_pending_width;
-    rsz.resize_height = cpi->resize_pending_height;
-    cpi->resize_pending_width = cpi->resize_pending_height = 0;
-  } else {
-    resize_denom = calculate_next_resize_scale(cpi);
-    rsz.resize_width = cpi->oxcf.width;
-    rsz.resize_height = cpi->oxcf.height;
-    av1_calculate_scaled_size(&rsz.resize_width, &rsz.resize_height,
-                              resize_denom);
-  }
-  rsz.superres_denom = calculate_next_superres_scale(cpi);
-  if (!validate_size_scales(oxcf->resize_mode, oxcf->superres_mode, oxcf->width,
-                            oxcf->height, &rsz))
-    assert(0 && "Invalid scale parameters");
-  return rsz;
-}
-
-static void setup_frame_size_from_params(AV1_COMP *cpi, size_params_type *rsz) {
-  int encode_width = rsz->resize_width;
-  int encode_height = rsz->resize_height;
-
-  AV1_COMMON *cm = &cpi->common;
-  cm->superres_upscaled_width = encode_width;
-  cm->superres_upscaled_height = encode_height;
-  cm->superres_scale_denominator = rsz->superres_denom;
-  av1_calculate_scaled_superres_size(&encode_width, &encode_height,
-                                     rsz->superres_denom);
-  set_frame_size(cpi, encode_width, encode_height);
-}
-
-static void setup_frame_size(AV1_COMP *cpi) {
-  size_params_type rsz = av1_calculate_next_size_params(cpi);
-  setup_frame_size_from_params(cpi, &rsz);
-}
-
-static void superres_post_encode(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-
-  if (!av1_superres_scaled(cm)) return;
-
-  assert(cpi->oxcf.enable_superres);
-  assert(!is_lossless_requested(&cpi->oxcf));
-  assert(!cm->all_lossless);
-
-  av1_superres_upscale(cm, NULL);
-
-  // If regular resizing is occurring the source will need to be downscaled to
-  // match the upscaled superres resolution. Otherwise the original source is
-  // used.
-  if (!av1_resize_scaled(cm)) {
-    cpi->source = cpi->unscaled_source;
-    if (cpi->last_source != NULL) cpi->last_source = cpi->unscaled_last_source;
-  } else {
-    assert(cpi->unscaled_source->y_crop_width != cm->superres_upscaled_width);
-    assert(cpi->unscaled_source->y_crop_height != cm->superres_upscaled_height);
-    // Do downscale. cm->(width|height) has been updated by
-    // av1_superres_upscale
-    if (aom_realloc_frame_buffer(
-            &cpi->scaled_source, cm->superres_upscaled_width,
-            cm->superres_upscaled_height, cm->seq_params.subsampling_x,
-            cm->seq_params.subsampling_y, cm->seq_params.use_highbitdepth,
-            AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
-      aom_internal_error(
-          &cm->error, AOM_CODEC_MEM_ERROR,
-          "Failed to reallocate scaled source buffer for superres");
-    assert(cpi->scaled_source.y_crop_width == cm->superres_upscaled_width);
-    assert(cpi->scaled_source.y_crop_height == cm->superres_upscaled_height);
-    av1_resize_and_extend_frame(cpi->unscaled_source, &cpi->scaled_source,
-                                (int)cm->seq_params.bit_depth, num_planes);
-    cpi->source = &cpi->scaled_source;
-  }
-}
-
-static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
-
-  assert(IMPLIES(is_lossless_requested(&cpi->oxcf),
-                 cm->coded_lossless && cm->all_lossless));
-
-  const int no_loopfilter = cm->coded_lossless || cm->large_scale_tile;
-  const int no_cdef =
-      !cm->seq_params.enable_cdef || cm->coded_lossless || cm->large_scale_tile;
-  const int no_restoration = !cm->seq_params.enable_restoration ||
-                             cm->all_lossless || cm->large_scale_tile;
-
-  struct loopfilter *lf = &cm->lf;
-
-  if (no_loopfilter) {
-    lf->filter_level[0] = 0;
-    lf->filter_level[1] = 0;
-  } else {
-    struct aom_usec_timer timer;
-
-    aom_clear_system_state();
-
-    aom_usec_timer_start(&timer);
-
-    av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick);
-
-    aom_usec_timer_mark(&timer);
-    cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer);
-  }
-
-  if (lf->filter_level[0] || lf->filter_level[1]) {
-#if LOOP_FILTER_BITMASK
-    av1_loop_filter_frame(cm->frame_to_show, cm, xd, 0, 0, num_planes, 0);
-#else
-    if (cpi->num_workers > 1)
-      av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd, 0, num_planes, 0,
-                               cpi->workers, cpi->num_workers,
-                               &cpi->lf_row_sync);
-    else
-      av1_loop_filter_frame(cm->frame_to_show, cm, xd, 0, num_planes, 0);
-#endif
-  }
-
-  if (!no_restoration)
-    av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 0);
-
-  if (no_cdef) {
-    cm->cdef_bits = 0;
-    cm->cdef_strengths[0] = 0;
-    cm->nb_cdef_strengths = 1;
-    cm->cdef_uv_strengths[0] = 0;
-  } else {
-    // Find CDEF parameters
-    av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd,
-                    cpi->sf.fast_cdef_search);
-
-    // Apply the filter
-    av1_cdef_frame(cm->frame_to_show, cm, xd);
-  }
-
-  superres_post_encode(cpi);
-
-  if (no_restoration) {
-    cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
-    cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
-    cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
-  } else {
-    av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 1);
-    av1_pick_filter_restoration(cpi->source, cpi);
-    if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
-        cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
-        cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
-      if (cpi->num_workers > 1)
-        av1_loop_restoration_filter_frame_mt(cm->frame_to_show, cm, 0,
-                                             cpi->workers, cpi->num_workers,
-                                             &cpi->lr_row_sync, &cpi->lr_ctxt);
-      else
-        av1_loop_restoration_filter_frame(cm->frame_to_show, cm, 0,
-                                          &cpi->lr_ctxt);
-    }
-  }
-}
-
-static int encode_without_recode_loop(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  int q = 0, bottom_index = 0, top_index = 0;  // Dummy variables.
-
-  aom_clear_system_state();
-
-  set_size_independent_vars(cpi);
-
-  setup_frame_size(cpi);
-
-  assert(cm->width == cpi->scaled_source.y_crop_width);
-  assert(cm->height == cpi->scaled_source.y_crop_height);
-
-  set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
-
-  cpi->source =
-      av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
-  if (cpi->unscaled_last_source != NULL)
-    cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
-                                             &cpi->scaled_last_source);
-  cpi->source->buf_8bit_valid = 0;
-  if (frame_is_intra_only(cm) == 0) {
-    scale_references(cpi);
-  }
-
-  av1_set_quantizer(cm, q);
-  setup_frame(cpi);
-  suppress_active_map(cpi);
-
-  // Variance adaptive and in frame q adjustment experiments are mutually
-  // exclusive.
-  if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
-    av1_vaq_frame_setup(cpi);
-  } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
-    av1_setup_in_frame_q_adj(cpi);
-  } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
-    av1_cyclic_refresh_setup(cpi);
-  }
-  apply_active_map(cpi);
-  if (cm->seg.enabled) {
-    if (!cm->seg.update_data && cm->prev_frame) {
-      segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
-    } else {
-      calculate_segdata(&cm->seg);
-    }
-  } else {
-    memset(&cm->seg, 0, sizeof(cm->seg));
-  }
-  segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
-
-  // transform / motion compensation build reconstruction frame
-  av1_encode_frame(cpi);
-
-  // Update some stats from cyclic refresh, and check if we should not update
-  // golden reference, for 1 pass CBR.
-  if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME &&
-      (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == AOM_CBR))
-    av1_cyclic_refresh_check_golden_update(cpi);
-
-  // Update the skip mb flag probabilities based on the distribution
-  // seen in the last encoder iteration.
-  // update_base_skip_probs(cpi);
-  aom_clear_system_state();
-  return AOM_CODEC_OK;
-}
-
-static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
-  AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-  int bottom_index, top_index;
-  int loop_count = 0;
-  int loop_at_this_size = 0;
-  int loop = 0;
-  int overshoot_seen = 0;
-  int undershoot_seen = 0;
-  int frame_over_shoot_limit;
-  int frame_under_shoot_limit;
-  int q = 0, q_low = 0, q_high = 0;
-
-  set_size_independent_vars(cpi);
-
-  cpi->source->buf_8bit_valid = 0;
-
-  aom_clear_system_state();
-  setup_frame_size(cpi);
-  set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
-
-  do {
-    aom_clear_system_state();
-
-    if (loop_count == 0) {
-      // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
-      set_mv_search_params(cpi);
-
-      // Reset the loop state for new frame size.
-      overshoot_seen = 0;
-      undershoot_seen = 0;
-
-      q_low = bottom_index;
-      q_high = top_index;
-
-      loop_at_this_size = 0;
-
-      // Decide frame size bounds first time through.
-      av1_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
-                                       &frame_under_shoot_limit,
-                                       &frame_over_shoot_limit);
-    }
-
-    // if frame was scaled calculate global_motion_search again if already
-    // done
-    if (loop_count > 0 && cpi->source && cpi->global_motion_search_done)
-      if (cpi->source->y_crop_width != cm->width ||
-          cpi->source->y_crop_height != cm->height)
-        cpi->global_motion_search_done = 0;
-    cpi->source =
-        av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
-    if (cpi->unscaled_last_source != NULL)
-      cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
-                                               &cpi->scaled_last_source);
-
-    if (frame_is_intra_only(cm) == 0) {
-      if (loop_count > 0) {
-        release_scaled_references(cpi);
-      }
-      scale_references(cpi);
-    }
-    av1_set_quantizer(cm, q);
-    // printf("Frame %d/%d: q = %d, frame_type = %d\n", cm->current_video_frame,
-    //        cm->show_frame, q, cm->frame_type);
-
-    if (loop_count == 0) setup_frame(cpi);
-
-    // Base q-index may have changed, so we need to assign proper default coef
-    // probs before every iteration.
-    if (cm->primary_ref_frame == PRIMARY_REF_NONE ||
-        cm->frame_refs[cm->primary_ref_frame].idx < 0) {
-      av1_default_coef_probs(cm);
-      av1_setup_frame_contexts(cm);
-    }
-
-    // Variance adaptive and in frame q adjustment experiments are mutually
-    // exclusive.
-    if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
-      av1_vaq_frame_setup(cpi);
-    } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
-      av1_setup_in_frame_q_adj(cpi);
-    }
-    if (cm->seg.enabled) {
-      if (!cm->seg.update_data && cm->prev_frame) {
-        segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
-      } else {
-        calculate_segdata(&cm->seg);
-      }
-    } else {
-      memset(&cm->seg, 0, sizeof(cm->seg));
-    }
-    segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
-
-    // transform / motion compensation build reconstruction frame
-    save_coding_context(cpi);
-    av1_encode_frame(cpi);
-
-    // Update the skip mb flag probabilities based on the distribution
-    // seen in the last encoder iteration.
-    // update_base_skip_probs(cpi);
-
-    aom_clear_system_state();
-
-    // Dummy pack of the bitstream using up to date stats to get an
-    // accurate estimate of output frame size to determine if we need
-    // to recode.
-    if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
-      restore_coding_context(cpi);
-
-      if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK)
-        return AOM_CODEC_ERROR;
-
-      rc->projected_frame_size = (int)(*size) << 3;
-      restore_coding_context(cpi);
-
-      if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
-    }
-
-    if (cpi->oxcf.rc_mode == AOM_Q) {
-      loop = 0;
-    } else {
-      if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
-          (rc->projected_frame_size < rc->max_frame_bandwidth)) {
-        int last_q = q;
-        int64_t kf_err;
-
-        int64_t high_err_target = cpi->ambient_err;
-        int64_t low_err_target = cpi->ambient_err >> 1;
-
-        if (cm->seq_params.use_highbitdepth) {
-          kf_err = aom_highbd_get_y_sse(cpi->source, get_frame_new_buffer(cm));
-        } else {
-          kf_err = aom_get_y_sse(cpi->source, get_frame_new_buffer(cm));
-        }
-        // Prevent possible divide by zero error below for perfect KF
-        kf_err += !kf_err;
-
-        // The key frame is not good enough or we can afford
-        // to make it better without undue risk of popping.
-        if ((kf_err > high_err_target &&
-             rc->projected_frame_size <= frame_over_shoot_limit) ||
-            (kf_err > low_err_target &&
-             rc->projected_frame_size <= frame_under_shoot_limit)) {
-          // Lower q_high
-          q_high = q > q_low ? q - 1 : q_low;
-
-          // Adjust Q
-          q = (int)((q * high_err_target) / kf_err);
-          q = AOMMIN(q, (q_high + q_low) >> 1);
-        } else if (kf_err < low_err_target &&
-                   rc->projected_frame_size >= frame_under_shoot_limit) {
-          // The key frame is much better than the previous frame
-          // Raise q_low
-          q_low = q < q_high ? q + 1 : q_high;
-
-          // Adjust Q
-          q = (int)((q * low_err_target) / kf_err);
-          q = AOMMIN(q, (q_high + q_low + 1) >> 1);
-        }
-
-        // Clamp Q to upper and lower limits:
-        q = clamp(q, q_low, q_high);
-
-        loop = q != last_q;
-      } else if (recode_loop_test(cpi, frame_over_shoot_limit,
-                                  frame_under_shoot_limit, q,
-                                  AOMMAX(q_high, top_index), bottom_index)) {
-        // Is the projected frame size out of range and are we allowed
-        // to attempt to recode.
-        int last_q = q;
-        int retries = 0;
-
-        // Frame size out of permitted range:
-        // Update correction factor & compute new Q to try...
-        // Frame is too large
-        if (rc->projected_frame_size > rc->this_frame_target) {
-          // Special case if the projected size is > the max allowed.
-          if (rc->projected_frame_size >= rc->max_frame_bandwidth)
-            q_high = rc->worst_quality;
-
-          // Raise Qlow as to at least the current value
-          q_low = q < q_high ? q + 1 : q_high;
-
-          if (undershoot_seen || loop_at_this_size > 1) {
-            // Update rate_correction_factor unless
-            av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-
-            q = (q_high + q_low + 1) / 2;
-          } else {
-            // Update rate_correction_factor unless
-            av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-
-            q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
-                                  AOMMAX(q_high, top_index), cm->width,
-                                  cm->height);
-
-            while (q < q_low && retries < 10) {
-              av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-              q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
-                                    AOMMAX(q_high, top_index), cm->width,
-                                    cm->height);
-              retries++;
-            }
-          }
-
-          overshoot_seen = 1;
-        } else {
-          // Frame is too small
-          q_high = q > q_low ? q - 1 : q_low;
-
-          if (overshoot_seen || loop_at_this_size > 1) {
-            av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-            q = (q_high + q_low) / 2;
-          } else {
-            av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-            q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
-                                  top_index, cm->width, cm->height);
-            // Special case reset for qlow for constrained quality.
-            // This should only trigger where there is very substantial
-            // undershoot on a frame and the auto cq level is above
-            // the user passsed in value.
-            if (cpi->oxcf.rc_mode == AOM_CQ && q < q_low) {
-              q_low = q;
-            }
-
-            while (q > q_high && retries < 10) {
-              av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-              q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
-                                    top_index, cm->width, cm->height);
-              retries++;
-            }
-          }
-
-          undershoot_seen = 1;
-        }
-
-        // Clamp Q to upper and lower limits:
-        q = clamp(q, q_low, q_high);
-
-        loop = (q != last_q);
-      } else {
-        loop = 0;
-      }
-    }
-
-    // Special case for overlay frame.
-    if (rc->is_src_frame_alt_ref &&
-        rc->projected_frame_size < rc->max_frame_bandwidth)
-      loop = 0;
-
-    if (!cpi->sf.gm_disable_recode) {
-      if (recode_loop_test_global_motion(cpi)) loop = 1;
-    }
-
-    if (loop) {
-      ++loop_count;
-      ++loop_at_this_size;
-
-#if CONFIG_INTERNAL_STATS
-      ++cpi->tot_recode_hits;
-#endif
-    }
-  } while (loop);
-
-  return AOM_CODEC_OK;
-}
-
-static int get_ref_frame_flags(const AV1_COMP *cpi) {
-  const int *const map = cpi->common.ref_frame_map;
-
-  // No.1 Priority: LAST_FRAME
-  const int last2_is_last = map[cpi->ref_fb_idx[1]] == map[cpi->ref_fb_idx[0]];
-  const int last3_is_last = map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[0]];
-  const int gld_is_last =
-      map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[0]];
-  const int bwd_is_last =
-      map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[0]];
-  const int alt2_is_last =
-      map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[0]];
-  const int alt_is_last =
-      map[cpi->ref_fb_idx[ALTREF_FRAME - 1]] == map[cpi->ref_fb_idx[0]];
-
-  // No.2 Priority: ALTREF_FRAME
-  const int last2_is_alt =
-      map[cpi->ref_fb_idx[1]] == map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
-  const int last3_is_alt =
-      map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
-  const int gld_is_alt = map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] ==
-                         map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
-  const int bwd_is_alt = map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] ==
-                         map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
-  const int alt2_is_alt = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] ==
-                          map[cpi->ref_fb_idx[ALTREF_FRAME - 1]];
-
-  // No.3 Priority: LAST2_FRAME
-  const int last3_is_last2 = map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[1]];
-  const int gld_is_last2 =
-      map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[1]];
-  const int bwd_is_last2 =
-      map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[1]];
-  const int alt2_is_last2 =
-      map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[1]];
-
-  // No.4 Priority: LAST3_FRAME
-  const int gld_is_last3 =
-      map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[2]];
-  const int bwd_is_last3 =
-      map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[2]];
-  const int alt2_is_last3 =
-      map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[2]];
-
-  // No.5 Priority: GOLDEN_FRAME
-  const int bwd_is_gld = map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] ==
-                         map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]];
-  const int alt2_is_gld = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] ==
-                          map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]];
-
-  // No.6 Priority: BWDREF_FRAME
-  const int alt2_is_bwd = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] ==
-                          map[cpi->ref_fb_idx[BWDREF_FRAME - 1]];
-
-  // No.7 Priority: ALTREF2_FRAME
-
-  // After av1_apply_encoding_flags() is called, cpi->ref_frame_flags might be
-  // adjusted according to external encoder flags.
-  int flags = cpi->ext_ref_frame_flags;
-
-  if (cpi->rc.frames_till_gf_update_due == INT_MAX) flags &= ~AOM_GOLD_FLAG;
-
-  if (alt_is_last) flags &= ~AOM_ALT_FLAG;
-
-  if (last2_is_last || last2_is_alt) flags &= ~AOM_LAST2_FLAG;
-
-  if (last3_is_last || last3_is_alt || last3_is_last2) flags &= ~AOM_LAST3_FLAG;
-
-  if (gld_is_last || gld_is_alt || gld_is_last2 || gld_is_last3)
-    flags &= ~AOM_GOLD_FLAG;
-
-  if ((bwd_is_last || bwd_is_alt || bwd_is_last2 || bwd_is_last3 ||
-       bwd_is_gld) &&
-      (flags & AOM_BWD_FLAG))
-    flags &= ~AOM_BWD_FLAG;
-
-  if ((alt2_is_last || alt2_is_alt || alt2_is_last2 || alt2_is_last3 ||
-       alt2_is_gld || alt2_is_bwd) &&
-      (flags & AOM_ALT2_FLAG))
-    flags &= ~AOM_ALT2_FLAG;
-
-  return flags;
-}
-
-static void set_ext_overrides(AV1_COMP *cpi) {
-  // Overrides the defaults with the externally supplied values with
-  // av1_update_reference() and av1_update_entropy() calls
-  // Note: The overrides are valid only for the next frame passed
-  // to encode_frame_to_data_rate() function
-  if (cpi->ext_use_s_frame) cpi->common.frame_type = S_FRAME;
-  cpi->common.force_primary_ref_none = cpi->ext_use_primary_ref_none;
-
-  if (cpi->ext_refresh_frame_context_pending) {
-    cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
-    cpi->ext_refresh_frame_context_pending = 0;
-  }
-  if (cpi->ext_refresh_frame_flags_pending) {
-    cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
-    cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
-    cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
-    cpi->refresh_bwd_ref_frame = cpi->ext_refresh_bwd_ref_frame;
-    cpi->refresh_alt2_ref_frame = cpi->ext_refresh_alt2_ref_frame;
-    cpi->ext_refresh_frame_flags_pending = 0;
-  }
-  cpi->common.allow_ref_frame_mvs = cpi->ext_use_ref_frame_mvs;
-  // A keyframe is already error resilient and keyframes with
-  // error_resilient_mode interferes with the use of show_existing_frame
-  // when forward reference keyframes are enabled.
-  cpi->common.error_resilient_mode =
-      cpi->ext_use_error_resilient && cpi->common.frame_type != KEY_FRAME;
-}
-
-#define DUMP_RECON_FRAMES 0
-
-#if DUMP_RECON_FRAMES == 1
-// NOTE(zoeliu): For debug - Output the filtered reconstructed video.
-static void dump_filtered_recon_frames(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const YV12_BUFFER_CONFIG *recon_buf = cm->frame_to_show;
-
-  if (recon_buf == NULL) {
-    printf("Frame %d is not ready.\n", cm->current_video_frame);
-    return;
-  }
-
-  static const int flag_list[REF_FRAMES] = { 0,
-                                             AOM_LAST_FLAG,
-                                             AOM_LAST2_FLAG,
-                                             AOM_LAST3_FLAG,
-                                             AOM_GOLD_FLAG,
-                                             AOM_BWD_FLAG,
-                                             AOM_ALT2_FLAG,
-                                             AOM_ALT_FLAG };
-  printf(
-      "\n***Frame=%d (frame_offset=%d, show_frame=%d, "
-      "show_existing_frame=%d) "
-      "[LAST LAST2 LAST3 GOLDEN BWD ALT2 ALT]=[",
-      cm->current_video_frame, cm->frame_offset, cm->show_frame,
-      cm->show_existing_frame);
-  for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
-    const int ref_offset =
-        (buf_idx >= 0)
-            ? (int)cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset
-            : -1;
-    printf(
-        " %d(%c-%d-%4.2f)", ref_offset,
-        (cpi->ref_frame_flags & flag_list[ref_frame]) ? 'Y' : 'N',
-        (buf_idx >= 0) ? (int)cpi->frame_rf_level[buf_idx] : -1,
-        (buf_idx >= 0) ? rate_factor_deltas[cpi->frame_rf_level[buf_idx]] : -1);
-  }
-  printf(" ]\n");
-
-  if (!cm->show_frame) {
-    printf("Frame %d is a no show frame, so no image dump.\n",
-           cm->current_video_frame);
-    return;
-  }
-
-  int h;
-  char file_name[256] = "/tmp/enc_filtered_recon.yuv";
-  FILE *f_recon = NULL;
-
-  if (cm->current_video_frame == 0) {
-    if ((f_recon = fopen(file_name, "wb")) == NULL) {
-      printf("Unable to open file %s to write.\n", file_name);
-      return;
-    }
-  } else {
-    if ((f_recon = fopen(file_name, "ab")) == NULL) {
-      printf("Unable to open file %s to append.\n", file_name);
-      return;
-    }
-  }
-  printf(
-      "\nFrame=%5d, encode_update_type[%5d]=%1d, frame_offset=%d, "
-      "show_frame=%d, show_existing_frame=%d, source_alt_ref_active=%d, "
-      "refresh_alt_ref_frame=%d, rf_level=%d, "
-      "y_stride=%4d, uv_stride=%4d, cm->width=%4d, cm->height=%4d\n\n",
-      cm->current_video_frame, cpi->twopass.gf_group.index,
-      cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
-      cm->frame_offset, cm->show_frame, cm->show_existing_frame,
-      cpi->rc.source_alt_ref_active, cpi->refresh_alt_ref_frame,
-      cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index],
-      recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height);
-#if 0
-  int ref_frame;
-  printf("get_ref_frame_map_idx: [");
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
-    printf(" %d", get_ref_frame_map_idx(cpi, ref_frame));
-  printf(" ]\n");
-  printf("cm->new_fb_idx = %d\n", cm->new_fb_idx);
-  printf("cm->ref_frame_map = [");
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    printf(" %d", cm->ref_frame_map[ref_frame - LAST_FRAME]);
-  }
-  printf(" ]\n");
-#endif  // 0
-
-  // --- Y ---
-  for (h = 0; h < cm->height; ++h) {
-    fwrite(&recon_buf->y_buffer[h * recon_buf->y_stride], 1, cm->width,
-           f_recon);
-  }
-  // --- U ---
-  for (h = 0; h < (cm->height >> 1); ++h) {
-    fwrite(&recon_buf->u_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1),
-           f_recon);
-  }
-  // --- V ---
-  for (h = 0; h < (cm->height >> 1); ++h) {
-    fwrite(&recon_buf->v_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1),
-           f_recon);
-  }
-
-  fclose(f_recon);
-}
-#endif  // DUMP_RECON_FRAMES
-
-static INLINE int is_frame_droppable(AV1_COMP *cpi) {
-  return !(cpi->refresh_alt_ref_frame || cpi->refresh_alt2_ref_frame ||
-           cpi->refresh_bwd_ref_frame || cpi->refresh_golden_frame ||
-           cpi->refresh_last_frame);
-}
-
-static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, uint8_t *dest,
-                                     int skip_adapt,
-                                     unsigned int *frame_flags) {
-  AV1_COMMON *const cm = &cpi->common;
-  SequenceHeader *const seq_params = &cm->seq_params;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  struct segmentation *const seg = &cm->seg;
-
-  set_ext_overrides(cpi);
-  aom_clear_system_state();
-
-  // frame type has been decided outside of this function call
-  cm->cur_frame->intra_only = frame_is_intra_only(cm);
-  cm->cur_frame->frame_type = cm->frame_type;
-
-  // S_FRAMEs are always error resilient
-  cm->error_resilient_mode |= frame_is_sframe(cm);
-
-  cm->large_scale_tile = cpi->oxcf.large_scale_tile;
-  cm->single_tile_decoding = cpi->oxcf.single_tile_decoding;
-  if (cm->large_scale_tile) seq_params->frame_id_numbers_present_flag = 0;
-
-  cm->allow_ref_frame_mvs &= frame_might_allow_ref_frame_mvs(cm);
-  // cm->allow_ref_frame_mvs needs to be written into the frame header while
-  // cm->large_scale_tile is 1, therefore, "cm->large_scale_tile=1" case is
-  // separated from frame_might_allow_ref_frame_mvs().
-  cm->allow_ref_frame_mvs &= !cm->large_scale_tile;
-
-  cm->allow_warped_motion =
-      cpi->oxcf.allow_warped_motion && frame_might_allow_warped_motion(cm);
-
-  // Reset the frame packet stamp index.
-  if (cm->frame_type == KEY_FRAME && cm->show_frame)
-    cm->current_video_frame = 0;
-
-  // NOTE:
-  // (1) Move the setup of the ref_frame_flags upfront as it would be
-  //     determined by the current frame properties;
-  // (2) The setup of the ref_frame_flags applies to both
-  // show_existing_frame's
-  //     and the other cases.
-  if (cm->current_video_frame > 0)
-    cpi->ref_frame_flags = get_ref_frame_flags(cpi);
-
-  if (encode_show_existing_frame(cm)) {
-    // NOTE(zoeliu): In BIDIR_PRED, the existing frame to show is the current
-    //               BWDREF_FRAME in the reference frame buffer.
-    if (cm->frame_type == KEY_FRAME) {
-      cm->reset_decoder_state = 1;
-    } else {
-      cm->frame_type = INTER_FRAME;
-    }
-    cm->show_frame = 1;
-    cpi->frame_flags = *frame_flags;
-
-    restore_coding_context(cpi);
-
-    // Build the bitstream
-    if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK)
-      return AOM_CODEC_ERROR;
-
-    cpi->seq_params_locked = 1;
-
-    // Set up frame to show to get ready for stats collection.
-    cm->frame_to_show = get_frame_new_buffer(cm);
-
-    // Update current frame offset.
-    cm->frame_offset =
-        cm->buffer_pool->frame_bufs[cm->new_fb_idx].cur_frame_offset;
-
-#if DUMP_RECON_FRAMES == 1
-    // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
-    dump_filtered_recon_frames(cpi);
-#endif  // DUMP_RECON_FRAMES
-
-    // Update the LAST_FRAME in the reference frame buffer.
-    // NOTE:
-    // (1) For BWDREF_FRAME as the show_existing_frame, the reference frame
-    //     update has been done previously when handling the LAST_BIPRED_FRAME
-    //     right before BWDREF_FRAME (in the display order);
-    // (2) For INTNL_OVERLAY as the show_existing_frame, the reference frame
-    //     update will be done when the following is called, which will
-    //     exchange
-    //     the virtual indexes between LAST_FRAME and ALTREF2_FRAME, so that
-    //     LAST3 will get retired, LAST2 becomes LAST3, LAST becomes LAST2,
-    //     and
-    //     ALTREF2_FRAME will serve as the new LAST_FRAME.
-    update_reference_frames(cpi);
-
-    // Update frame flags
-    cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
-    cpi->frame_flags &= ~FRAMEFLAGS_BWDREF;
-    cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
-
-    *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
-
-    // Update the frame type
-    cm->last_frame_type = cm->frame_type;
-
-    // Since we allocate a spot for the OVERLAY frame in the gf group, we need
-    // to do post-encoding update accordingly.
-    if (cpi->rc.is_src_frame_alt_ref) {
-      av1_set_target_rate(cpi, cm->width, cm->height);
-      av1_rc_postencode_update(cpi, *size);
-    }
-
-    ++cm->current_video_frame;
-
-    return AOM_CODEC_OK;
-  }
-
-  // Set default state for segment based loop filter update flags.
-  cm->lf.mode_ref_delta_update = 0;
-
-  // Set various flags etc to special state if it is a key frame.
-  if (frame_is_intra_only(cm) || frame_is_sframe(cm)) {
-    // Reset the loop filter deltas and segmentation map.
-    av1_reset_segment_features(cm);
-
-    // If segmentation is enabled force a map update for key frames.
-    if (seg->enabled) {
-      seg->update_map = 1;
-      seg->update_data = 1;
-    }
-
-    // The alternate reference frame cannot be active for a key frame.
-    cpi->rc.source_alt_ref_active = 0;
-  }
-  if (cpi->oxcf.mtu == 0) {
-    cm->num_tg = cpi->oxcf.num_tile_groups;
-  } else {
-    // Use a default value for the purposes of weighting costs in probability
-    // updates
-    cm->num_tg = DEFAULT_MAX_NUM_TG;
-  }
-
-  // For 1 pass CBR, check if we are dropping this frame.
-  // Never drop on key frame.
-  if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR &&
-      cm->frame_type != KEY_FRAME) {
-    if (av1_rc_drop_frame(cpi)) {
-      av1_rc_postencode_update_drop_frame(cpi);
-      return AOM_CODEC_OK;
-    }
-  }
-
-  aom_clear_system_state();
-
-#if CONFIG_INTERNAL_STATS
-  memset(cpi->mode_chosen_counts, 0,
-         MAX_MODES * sizeof(*cpi->mode_chosen_counts));
-#endif
-
-  if (seq_params->frame_id_numbers_present_flag) {
-    /* Non-normative definition of current_frame_id ("frame counter" with
-     * wraparound) */
-    const int frame_id_length = FRAME_ID_LENGTH;
-    if (cm->current_frame_id == -1) {
-      int lsb, msb;
-      /* quasi-random initialization of current_frame_id for a key frame */
-      if (cpi->source->flags & YV12_FLAG_HIGHBITDEPTH) {
-        lsb = CONVERT_TO_SHORTPTR(cpi->source->y_buffer)[0] & 0xff;
-        msb = CONVERT_TO_SHORTPTR(cpi->source->y_buffer)[1] & 0xff;
-      } else {
-        lsb = cpi->source->y_buffer[0] & 0xff;
-        msb = cpi->source->y_buffer[1] & 0xff;
-      }
-      cm->current_frame_id = ((msb << 8) + lsb) % (1 << frame_id_length);
-
-      // S_frame is meant for stitching different streams of different
-      // resolutions together, so current_frame_id must be the
-      // same across different streams of the same content current_frame_id
-      // should be the same and not random. 0x37 is a chosen number as start
-      // point
-      if (cpi->oxcf.sframe_enabled) cm->current_frame_id = 0x37;
-    } else {
-      cm->current_frame_id =
-          (cm->current_frame_id + 1 + (1 << frame_id_length)) %
-          (1 << frame_id_length);
-    }
-  }
-
-  switch (cpi->oxcf.cdf_update_mode) {
-    case 0:  // No CDF update for any frames(4~6% compression loss).
-      cm->disable_cdf_update = 1;
-      break;
-    case 1:  // Enable CDF update for all frames.
-      cm->disable_cdf_update = 0;
-      break;
-    case 2:
-      // Strategically determine at which frames to do CDF update.
-      // Currently only enable CDF update for all-intra and no-show frames(1.5%
-      // compression loss).
-      // TODO(huisu@google.com): design schemes for various trade-offs between
-      // compression quality and decoding speed.
-      cm->disable_cdf_update =
-          (frame_is_intra_only(cm) || !cm->show_frame) ? 0 : 1;
-      break;
-  }
-  cm->timing_info_present &= !seq_params->reduced_still_picture_hdr;
-
-  if (cpi->sf.recode_loop == DISALLOW_RECODE) {
-    if (encode_without_recode_loop(cpi) != AOM_CODEC_OK) return AOM_CODEC_ERROR;
-  } else {
-    if (encode_with_recode_loop(cpi, size, dest) != AOM_CODEC_OK)
-      return AOM_CODEC_ERROR;
-  }
-
-  cm->last_tile_cols = cm->tile_cols;
-  cm->last_tile_rows = cm->tile_rows;
-
-#ifdef OUTPUT_YUV_SKINMAP
-  if (cpi->common.current_video_frame > 1) {
-    av1_compute_skin_map(cpi, yuv_skinmap_file);
-  }
-#endif  // OUTPUT_YUV_SKINMAP
-
-  // Special case code to reduce pulsing when key frames are forced at a
-  // fixed interval. Note the reconstruction error if it is the frame before
-  // the force key frame
-  if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
-    if (seq_params->use_highbitdepth) {
-      cpi->ambient_err =
-          aom_highbd_get_y_sse(cpi->source, get_frame_new_buffer(cm));
-    } else {
-      cpi->ambient_err = aom_get_y_sse(cpi->source, get_frame_new_buffer(cm));
-    }
-  }
-
-  // If the encoder forced a KEY_FRAME decision or if frame is an S_FRAME
-  if ((cm->frame_type == KEY_FRAME && cm->show_frame) || frame_is_sframe(cm)) {
-    cpi->refresh_last_frame = 1;
-  }
-
-  cm->frame_to_show = get_frame_new_buffer(cm);
-  cm->frame_to_show->color_primaries = seq_params->color_primaries;
-  cm->frame_to_show->transfer_characteristics =
-      seq_params->transfer_characteristics;
-  cm->frame_to_show->matrix_coefficients = seq_params->matrix_coefficients;
-  cm->frame_to_show->monochrome = seq_params->monochrome;
-  cm->frame_to_show->chroma_sample_position =
-      seq_params->chroma_sample_position;
-  cm->frame_to_show->color_range = seq_params->color_range;
-  cm->frame_to_show->render_width = cm->render_width;
-  cm->frame_to_show->render_height = cm->render_height;
-
-  // TODO(zoeliu): For non-ref frames, loop filtering may need to be turned
-  // off.
-
-  // Pick the loop filter level for the frame.
-  if (!cm->allow_intrabc) {
-    loopfilter_frame(cpi, cm);
-  } else {
-    cm->lf.filter_level[0] = 0;
-    cm->lf.filter_level[1] = 0;
-    cm->cdef_bits = 0;
-    cm->cdef_strengths[0] = 0;
-    cm->nb_cdef_strengths = 1;
-    cm->cdef_uv_strengths[0] = 0;
-    cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
-    cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
-    cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
-  }
-
-  // TODO(debargha): Fix mv search range on encoder side
-  // aom_extend_frame_inner_borders(cm->frame_to_show, av1_num_planes(cm));
-  aom_extend_frame_borders(cm->frame_to_show, av1_num_planes(cm));
-
-#ifdef OUTPUT_YUV_REC
-  aom_write_one_yuv_frame(cm, cm->frame_to_show);
-#endif
-
-  // Build the bitstream
-  if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK)
-    return AOM_CODEC_ERROR;
-
-  cpi->seq_params_locked = 1;
-
-  if (skip_adapt) return AOM_CODEC_OK;
-
-  if (seq_params->frame_id_numbers_present_flag) {
-    int i;
-    // Update reference frame id values based on the value of refresh_frame_mask
-    for (i = 0; i < REF_FRAMES; i++) {
-      if ((cpi->refresh_frame_mask >> i) & 1) {
-        cm->ref_frame_id[i] = cm->current_frame_id;
-      }
-    }
-  }
-
-#if DUMP_RECON_FRAMES == 1
-  // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
-  dump_filtered_recon_frames(cpi);
-#endif  // DUMP_RECON_FRAMES
-
-  if (cm->seg.enabled) {
-    if (cm->seg.update_map) {
-      update_reference_segmentation_map(cpi);
-    } else if (cm->last_frame_seg_map) {
-      memcpy(cm->current_frame_seg_map, cm->last_frame_seg_map,
-             cm->mi_cols * cm->mi_rows * sizeof(uint8_t));
-    }
-  }
-
-  if (frame_is_intra_only(cm) == 0) {
-    release_scaled_references(cpi);
-  }
-
-  update_reference_frames(cpi);
-
-#if CONFIG_ENTROPY_STATS
-  av1_accumulate_frame_counts(&aggregate_fc, &cpi->counts);
-#endif  // CONFIG_ENTROPY_STATS
-
-  if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
-    *cm->fc = cpi->tile_data[cm->largest_tile_id].tctx;
-    av1_reset_cdf_symbol_counters(cm->fc);
-  }
-
-  if (cpi->refresh_golden_frame == 1)
-    cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
-  else
-    cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
-
-  if (cpi->refresh_alt_ref_frame == 1)
-    cpi->frame_flags |= FRAMEFLAGS_ALTREF;
-  else
-    cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
-
-  if (cpi->refresh_bwd_ref_frame == 1)
-    cpi->frame_flags |= FRAMEFLAGS_BWDREF;
-  else
-    cpi->frame_flags &= ~FRAMEFLAGS_BWDREF;
-
-  cm->last_frame_type = cm->frame_type;
-
-  av1_rc_postencode_update(cpi, *size);
-
-  if (cm->frame_type == KEY_FRAME) {
-    // Tell the caller that the frame was coded as a key frame
-    *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
-  } else {
-    *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
-  }
-
-  // Clear the one shot update flags for segmentation map and mode/ref loop
-  // filter deltas.
-  cm->seg.update_map = 0;
-  cm->seg.update_data = 0;
-  cm->lf.mode_ref_delta_update = 0;
-
-  // A droppable frame might not be shown but it always
-  // takes a space in the gf group. Therefore, even when
-  // it is not shown, we still need update the count down.
-
-  if (cm->show_frame) {
-    // TODO(zoeliu): We may only swamp mi and prev_mi for those frames that
-    // are
-    // being used as reference.
-    swap_mi_and_prev_mi(cm);
-    // Don't increment frame counters if this was an altref buffer
-    // update not a real frame
-
-    ++cm->current_video_frame;
-  }
-
-  // NOTE: Shall not refer to any frame not used as reference.
-  if (cm->is_reference_frame) {
-    // keep track of the last coded dimensions
-    cm->last_width = cm->width;
-    cm->last_height = cm->height;
-
-    // reset to normal state now that we are done.
-    cm->last_show_frame = cm->show_frame;
-  }
-
-  return AOM_CODEC_OK;
-}
-
-static INLINE void update_keyframe_counters(AV1_COMP *cpi) {
-  // TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME
-  //               differently here for rc->avg_frame_bandwidth.
-  if (cpi->common.show_frame || cpi->rc.is_bwd_ref_frame) {
-    if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref ||
-        cpi->common.frame_type == KEY_FRAME) {
-      // If this is a show_existing_frame with a source other than altref,
-      // or if it is not a displayed forward keyframe, the keyframe update
-      // counters were incremented when it was originally encoded.
-      cpi->rc.frames_since_key++;
-      cpi->rc.frames_to_key--;
-    }
-  }
-}
-
-static INLINE void update_frames_till_gf_update(AV1_COMP *cpi) {
-  // TODO(weitinglin): Updating this counter for is_frame_droppable
-  // is a work-around to handle the condition when a frame is drop.
-  // We should fix the cpi->common.show_frame flag
-  // instead of checking the other condition to update the counter properly.
-  if (cpi->common.show_frame || is_frame_droppable(cpi)) {
-    // Decrement count down till next gf
-    if (cpi->rc.frames_till_gf_update_due > 0)
-      cpi->rc.frames_till_gf_update_due--;
-  }
-}
-
-static INLINE void update_twopass_gf_group_index(AV1_COMP *cpi) {
-  // Increment the gf group index ready for the next frame. If this is
-  // a show_existing_frame with a source other than altref, or if it is not
-  // a displayed forward keyframe, the index was incremented when it was
-  // originally encoded.
-  if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref ||
-      cpi->common.frame_type == KEY_FRAME) {
-    ++cpi->twopass.gf_group.index;
-  }
-}
-
-static void update_rc_counts(AV1_COMP *cpi) {
-  update_keyframe_counters(cpi);
-  update_frames_till_gf_update(cpi);
-  if (cpi->oxcf.pass == 2) update_twopass_gf_group_index(cpi);
-}
-
-static int Pass0Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest,
-                       int skip_adapt, unsigned int *frame_flags) {
-  if (cpi->oxcf.rc_mode == AOM_CBR) {
-    av1_rc_get_one_pass_cbr_params(cpi);
-  } else {
-    av1_rc_get_one_pass_vbr_params(cpi);
-  }
-  if (encode_frame_to_data_rate(cpi, size, dest, skip_adapt, frame_flags) !=
-      AOM_CODEC_OK) {
-    return AOM_CODEC_ERROR;
-  }
-  update_rc_counts(cpi);
-  check_show_existing_frame(cpi);
-  return AOM_CODEC_OK;
-}
-
-static int Pass2Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest,
-                       unsigned int *frame_flags) {
-#if CONFIG_MISMATCH_DEBUG
-  mismatch_move_frame_idx_w();
-#endif
-#if TXCOEFF_COST_TIMER
-  AV1_COMMON *cm = &cpi->common;
-  cm->txcoeff_cost_timer = 0;
-  cm->txcoeff_cost_count = 0;
-#endif
-
-  if (encode_frame_to_data_rate(cpi, size, dest, 0, frame_flags) !=
-      AOM_CODEC_OK) {
-    return AOM_CODEC_ERROR;
-  }
-
-#if TXCOEFF_COST_TIMER
-  cm->cum_txcoeff_cost_timer += cm->txcoeff_cost_timer;
-  fprintf(stderr,
-          "\ntxb coeff cost block number: %ld, frame time: %ld, cum time %ld "
-          "in us\n",
-          cm->txcoeff_cost_count, cm->txcoeff_cost_timer,
-          cm->cum_txcoeff_cost_timer);
-#endif
-
-  av1_twopass_postencode_update(cpi);
-  update_rc_counts(cpi);
-  check_show_existing_frame(cpi);
-  return AOM_CODEC_OK;
-}
-
-#if CONFIG_DENOISE
-static int apply_denoise_2d(AV1_COMP *cpi, YV12_BUFFER_CONFIG *sd,
-                            int block_size, float noise_level,
-                            int64_t time_stamp, int64_t end_time) {
-  AV1_COMMON *const cm = &cpi->common;
-  if (!cpi->denoise_and_model) {
-    cpi->denoise_and_model = aom_denoise_and_model_alloc(
-        cm->seq_params.bit_depth, block_size, noise_level);
-    if (!cpi->denoise_and_model) {
-      aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                         "Error allocating denoise and model");
-      return -1;
-    }
-  }
-  if (!cpi->film_grain_table) {
-    cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table));
-    if (!cpi->film_grain_table) {
-      aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                         "Error allocating grain table");
-      return -1;
-    }
-    memset(cpi->film_grain_table, 0, sizeof(*cpi->film_grain_table));
-  }
-  if (aom_denoise_and_model_run(cpi->denoise_and_model, sd,
-                                &cm->film_grain_params)) {
-    if (cm->film_grain_params.apply_grain) {
-      aom_film_grain_table_append(cpi->film_grain_table, time_stamp, end_time,
-                                  &cm->film_grain_params);
-    }
-  }
-  return 0;
-}
-#endif
-
-int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
-                          YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
-                          int64_t end_time) {
-  AV1_COMMON *const cm = &cpi->common;
-  const SequenceHeader *const seq_params = &cm->seq_params;
-  struct aom_usec_timer timer;
-  int res = 0;
-  const int subsampling_x = sd->subsampling_x;
-  const int subsampling_y = sd->subsampling_y;
-  const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
-
-  check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
-
-  aom_usec_timer_start(&timer);
-
-#if CONFIG_DENOISE
-  if (cpi->oxcf.noise_level > 0)
-    if (apply_denoise_2d(cpi, sd, cpi->oxcf.noise_block_size,
-                         cpi->oxcf.noise_level, time_stamp, end_time) < 0)
-      res = -1;
-#endif  //  CONFIG_DENOISE
-
-  if (av1_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
-                         use_highbitdepth, frame_flags))
-    res = -1;
-  aom_usec_timer_mark(&timer);
-  cpi->time_receive_data += aom_usec_timer_elapsed(&timer);
-
-  if ((seq_params->profile == PROFILE_0) && !seq_params->monochrome &&
-      (subsampling_x != 1 || subsampling_y != 1)) {
-    aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
-                       "Non-4:2:0 color format requires profile 1 or 2");
-    res = -1;
-  }
-  if ((seq_params->profile == PROFILE_1) &&
-      !(subsampling_x == 0 && subsampling_y == 0)) {
-    aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
-                       "Profile 1 requires 4:4:4 color format");
-    res = -1;
-  }
-  if ((seq_params->profile == PROFILE_2) &&
-      (seq_params->bit_depth <= AOM_BITS_10) &&
-      !(subsampling_x == 1 && subsampling_y == 0)) {
-    aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
-                       "Profile 2 bit-depth < 10 requires 4:2:2 color format");
-    res = -1;
-  }
-
-  return res;
-}
-
-static int frame_is_reference(const AV1_COMP *cpi) {
-  const AV1_COMMON *cm = &cpi->common;
-
-  return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
-         cpi->refresh_golden_frame || cpi->refresh_bwd_ref_frame ||
-         cpi->refresh_alt2_ref_frame || cpi->refresh_alt_ref_frame ||
-         !cm->error_resilient_mode || cm->lf.mode_ref_delta_update ||
-         cm->seg.update_map || cm->seg.update_data;
-}
-
-static void adjust_frame_rate(AV1_COMP *cpi,
-                              const struct lookahead_entry *source) {
-  int64_t this_duration;
-  int step = 0;
-
-  if (source->ts_start == cpi->first_time_stamp_ever) {
-    this_duration = source->ts_end - source->ts_start;
-    step = 1;
-  } else {
-    int64_t last_duration =
-        cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
-
-    this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
-
-    // do a step update if the duration changes by 10%
-    if (last_duration)
-      step = (int)((this_duration - last_duration) * 10 / last_duration);
-  }
-
-  if (this_duration) {
-    if (step) {
-      av1_new_framerate(cpi, 10000000.0 / this_duration);
-    } else {
-      // Average this frame's rate into the last second's average
-      // frame rate. If we haven't seen 1 second yet, then average
-      // over the whole interval seen.
-      const double interval = AOMMIN(
-          (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
-      double avg_duration = 10000000.0 / cpi->framerate;
-      avg_duration *= (interval - avg_duration + this_duration);
-      avg_duration /= interval;
-
-      av1_new_framerate(cpi, 10000000.0 / avg_duration);
-    }
-  }
-  cpi->last_time_stamp_seen = source->ts_start;
-  cpi->last_end_time_stamp_seen = source->ts_end;
-}
-
-// Returns 0 if this is not an alt ref else the offset of the source frame
-// used as the arf midpoint.
-static int get_arf_src_index(AV1_COMP *cpi) {
-  RATE_CONTROL *const rc = &cpi->rc;
-  int arf_src_index = 0;
-  if (is_altref_enabled(cpi)) {
-    if (cpi->oxcf.pass == 2) {
-      const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-      if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
-        arf_src_index = gf_group->arf_src_offset[gf_group->index];
-      }
-    } else if (rc->source_alt_ref_pending) {
-      arf_src_index = rc->frames_till_gf_update_due;
-    }
-  }
-  return arf_src_index;
-}
-
-static int get_brf_src_index(AV1_COMP *cpi) {
-  int brf_src_index = 0;
-  const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-
-  // TODO(zoeliu): We need to add the check on the -bwd_ref command line setup
-  //               flag.
-  if (gf_group->bidir_pred_enabled[gf_group->index]) {
-    if (cpi->oxcf.pass == 2) {
-      if (gf_group->update_type[gf_group->index] == BRF_UPDATE)
-        brf_src_index = gf_group->brf_src_offset[gf_group->index];
-    } else {
-      // TODO(zoeliu): To re-visit the setup for this scenario
-      brf_src_index = cpi->rc.bipred_group_interval - 1;
-    }
-  }
-
-  return brf_src_index;
-}
-
-// Returns 0 if this is not an alt ref else the offset of the source frame
-// used as the arf midpoint.
-static int get_arf2_src_index(AV1_COMP *cpi) {
-  int arf2_src_index = 0;
-  if (is_altref_enabled(cpi) && cpi->num_extra_arfs) {
-    if (cpi->oxcf.pass == 2) {
-      const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-      if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
-        arf2_src_index = gf_group->arf_src_offset[gf_group->index];
-      }
-    }
-  }
-  return arf2_src_index;
-}
-
-static void check_src_altref(AV1_COMP *cpi,
-                             const struct lookahead_entry *source) {
-  RATE_CONTROL *const rc = &cpi->rc;
-
-  // If pass == 2, the parameters set here will be reset in
-  // av1_rc_get_second_pass_params()
-
-  if (cpi->oxcf.pass == 2) {
-    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-    rc->is_src_frame_alt_ref =
-        (gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE) ||
-        (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
-    rc->is_src_frame_ext_arf =
-        gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE;
-  } else {
-    rc->is_src_frame_alt_ref =
-        cpi->alt_ref_source && (source == cpi->alt_ref_source);
-  }
-
-  if (rc->is_src_frame_alt_ref) {
-    // Current frame is an ARF overlay frame.
-    cpi->alt_ref_source = NULL;
-
-    if (rc->is_src_frame_ext_arf && !cpi->common.show_existing_frame) {
-      // For INTNL_OVERLAY, when show_existing_frame == 0, they do need to
-      // refresh the LAST_FRAME, i.e. LAST3 gets retired, LAST2 becomes LAST3,
-      // LAST becomes LAST2, and INTNL_OVERLAY becomes LAST.
-      cpi->refresh_last_frame = 1;
-    } else {
-      // Don't refresh the last buffer for an ARF overlay frame. It will
-      // become the GF so preserve last as an alternative prediction option.
-      cpi->refresh_last_frame = 0;
-    }
-  }
-}
-
-#if CONFIG_INTERNAL_STATS
-extern double av1_get_blockiness(const unsigned char *img1, int img1_pitch,
-                                 const unsigned char *img2, int img2_pitch,
-                                 int width, int height);
-
-static void adjust_image_stat(double y, double u, double v, double all,
-                              ImageStat *s) {
-  s->stat[STAT_Y] += y;
-  s->stat[STAT_U] += u;
-  s->stat[STAT_V] += v;
-  s->stat[STAT_ALL] += all;
-  s->worst = AOMMIN(s->worst, all);
-}
-
-static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) {
-  AV1_COMMON *const cm = &cpi->common;
-  double samples = 0.0;
-  uint32_t in_bit_depth = 8;
-  uint32_t bit_depth = 8;
-
-#if CONFIG_INTER_STATS_ONLY
-  if (cm->frame_type == KEY_FRAME) return;  // skip key frame
-#endif
-  cpi->bytes += frame_bytes;
-
-  if (cm->seq_params.use_highbitdepth) {
-    in_bit_depth = cpi->oxcf.input_bit_depth;
-    bit_depth = cm->seq_params.bit_depth;
-  }
-  if (cm->show_frame) {
-    const YV12_BUFFER_CONFIG *orig = cpi->source;
-    const YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
-    double y, u, v, frame_all;
-
-    cpi->count++;
-    if (cpi->b_calculate_psnr) {
-      PSNR_STATS psnr;
-      double frame_ssim2 = 0.0, weight = 0.0;
-      aom_clear_system_state();
-      // TODO(yaowu): unify these two versions into one.
-      aom_calc_highbd_psnr(orig, recon, &psnr, bit_depth, in_bit_depth);
-
-      adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3], psnr.psnr[0],
-                        &cpi->psnr);
-      cpi->total_sq_error += psnr.sse[0];
-      cpi->total_samples += psnr.samples[0];
-      samples = psnr.samples[0];
-      // TODO(yaowu): unify these two versions into one.
-      if (cm->seq_params.use_highbitdepth)
-        frame_ssim2 =
-            aom_highbd_calc_ssim(orig, recon, &weight, bit_depth, in_bit_depth);
-      else
-        frame_ssim2 = aom_calc_ssim(orig, recon, &weight);
-
-      cpi->worst_ssim = AOMMIN(cpi->worst_ssim, frame_ssim2);
-      cpi->summed_quality += frame_ssim2 * weight;
-      cpi->summed_weights += weight;
-
-#if 0
-      {
-        FILE *f = fopen("q_used.stt", "a");
-        double y2 = psnr.psnr[1];
-        double u2 = psnr.psnr[2];
-        double v2 = psnr.psnr[3];
-        double frame_psnr2 = psnr.psnr[0];
-        fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
-                cm->current_video_frame, y2, u2, v2,
-                frame_psnr2, frame_ssim2);
-        fclose(f);
-      }
-#endif
-    }
-    if (cpi->b_calculate_blockiness) {
-      if (!cm->seq_params.use_highbitdepth) {
-        const double frame_blockiness =
-            av1_get_blockiness(orig->y_buffer, orig->y_stride, recon->y_buffer,
-                               recon->y_stride, orig->y_width, orig->y_height);
-        cpi->worst_blockiness = AOMMAX(cpi->worst_blockiness, frame_blockiness);
-        cpi->total_blockiness += frame_blockiness;
-      }
-
-      if (cpi->b_calculate_consistency) {
-        if (!cm->seq_params.use_highbitdepth) {
-          const double this_inconsistency = aom_get_ssim_metrics(
-              orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride,
-              orig->y_width, orig->y_height, cpi->ssim_vars, &cpi->metrics, 1);
-
-          const double peak = (double)((1 << in_bit_depth) - 1);
-          const double consistency =
-              aom_sse_to_psnr(samples, peak, cpi->total_inconsistency);
-          if (consistency > 0.0)
-            cpi->worst_consistency =
-                AOMMIN(cpi->worst_consistency, consistency);
-          cpi->total_inconsistency += this_inconsistency;
-        }
-      }
-    }
-
-    frame_all =
-        aom_calc_fastssim(orig, recon, &y, &u, &v, bit_depth, in_bit_depth);
-    adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
-    frame_all = aom_psnrhvs(orig, recon, &y, &u, &v, bit_depth, in_bit_depth);
-    adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
-  }
-}
-#endif  // CONFIG_INTERNAL_STATS
-
-static int is_integer_mv(AV1_COMP *cpi, const YV12_BUFFER_CONFIG *cur_picture,
-                         const YV12_BUFFER_CONFIG *last_picture,
-                         hash_table *last_hash_table) {
-  aom_clear_system_state();
-  // check use hash ME
-  int k;
-  uint32_t hash_value_1;
-  uint32_t hash_value_2;
-
-  const int block_size = 8;
-  const double threshold_current = 0.8;
-  const double threshold_average = 0.95;
-  const int max_history_size = 32;
-  int T = 0;  // total block
-  int C = 0;  // match with collocated block
-  int S = 0;  // smooth region but not match with collocated block
-  int M = 0;  // match with other block
-
-  const int pic_width = cur_picture->y_width;
-  const int pic_height = cur_picture->y_height;
-  for (int i = 0; i + block_size <= pic_height; i += block_size) {
-    for (int j = 0; j + block_size <= pic_width; j += block_size) {
-      const int x_pos = j;
-      const int y_pos = i;
-      int match = 1;
-      T++;
-
-      // check whether collocated block match with current
-      uint8_t *p_cur = cur_picture->y_buffer;
-      uint8_t *p_ref = last_picture->y_buffer;
-      int stride_cur = cur_picture->y_stride;
-      int stride_ref = last_picture->y_stride;
-      p_cur += (y_pos * stride_cur + x_pos);
-      p_ref += (y_pos * stride_ref + x_pos);
-
-      if (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH) {
-        uint16_t *p16_cur = CONVERT_TO_SHORTPTR(p_cur);
-        uint16_t *p16_ref = CONVERT_TO_SHORTPTR(p_ref);
-        for (int tmpY = 0; tmpY < block_size && match; tmpY++) {
-          for (int tmpX = 0; tmpX < block_size && match; tmpX++) {
-            if (p16_cur[tmpX] != p16_ref[tmpX]) {
-              match = 0;
-            }
-          }
-          p16_cur += stride_cur;
-          p16_ref += stride_ref;
-        }
-      } else {
-        for (int tmpY = 0; tmpY < block_size && match; tmpY++) {
-          for (int tmpX = 0; tmpX < block_size && match; tmpX++) {
-            if (p_cur[tmpX] != p_ref[tmpX]) {
-              match = 0;
-            }
-          }
-          p_cur += stride_cur;
-          p_ref += stride_ref;
-        }
-      }
-
-      if (match) {
-        C++;
-        continue;
-      }
-
-      if (av1_hash_is_horizontal_perfect(cur_picture, block_size, x_pos,
-                                         y_pos) ||
-          av1_hash_is_vertical_perfect(cur_picture, block_size, x_pos, y_pos)) {
-        S++;
-        continue;
-      }
-
-      av1_get_block_hash_value(
-          cur_picture->y_buffer + y_pos * stride_cur + x_pos, stride_cur,
-          block_size, &hash_value_1, &hash_value_2,
-          (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH), &cpi->td.mb);
-      // Hashing does not work for highbitdepth currently.
-      // TODO(Roger): Make it work for highbitdepth.
-      if (av1_use_hash_me(&cpi->common)) {
-        if (av1_has_exact_match(last_hash_table, hash_value_1, hash_value_2)) {
-          M++;
-        }
-      }
-    }
-  }
-
-  assert(T > 0);
-  double csm_rate = ((double)(C + S + M)) / ((double)(T));
-  double m_rate = ((double)(M)) / ((double)(T));
-
-  cpi->csm_rate_array[cpi->rate_index] = csm_rate;
-  cpi->m_rate_array[cpi->rate_index] = m_rate;
-
-  cpi->rate_index = (cpi->rate_index + 1) % max_history_size;
-  cpi->rate_size++;
-  cpi->rate_size = AOMMIN(cpi->rate_size, max_history_size);
-
-  if (csm_rate < threshold_current) {
-    return 0;
-  }
-
-  if (C == T) {
-    return 1;
-  }
-
-  double csm_average = 0.0;
-  double m_average = 0.0;
-
-  for (k = 0; k < cpi->rate_size; k++) {
-    csm_average += cpi->csm_rate_array[k];
-    m_average += cpi->m_rate_array[k];
-  }
-  csm_average /= cpi->rate_size;
-  m_average /= cpi->rate_size;
-
-  if (csm_average < threshold_average) {
-    return 0;
-  }
-
-  if (M > (T - C - S) / 3) {
-    return 1;
-  }
-
-  if (csm_rate > 0.99 && m_rate > 0.01) {
-    return 1;
-  }
-
-  if (csm_average + m_average > 1.01) {
-    return 1;
-  }
-
-  return 0;
-}
-
-int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
-                            size_t *size, uint8_t *dest, int64_t *time_stamp,
-                            int64_t *time_end, int flush,
-                            const aom_rational_t *timebase) {
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  BufferPool *const pool = cm->buffer_pool;
-  RATE_CONTROL *const rc = &cpi->rc;
-  struct aom_usec_timer cmptimer;
-  YV12_BUFFER_CONFIG *force_src_buffer = NULL;
-  struct lookahead_entry *last_source = NULL;
-  struct lookahead_entry *source = NULL;
-  int arf_src_index;
-  int brf_src_index;
-  int i;
-
-#if CONFIG_BITSTREAM_DEBUG
-  assert(cpi->oxcf.max_threads == 0 &&
-         "bitstream debug tool does not support multithreading");
-  bitstream_queue_record_write();
-  bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
-#endif
-
-  cm->showable_frame = 0;
-  aom_usec_timer_start(&cmptimer);
-
-  set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV, 0);
-
-  // Normal defaults
-  cm->refresh_frame_context = oxcf->frame_parallel_decoding_mode
-                                  ? REFRESH_FRAME_CONTEXT_DISABLED
-                                  : REFRESH_FRAME_CONTEXT_BACKWARD;
-  if (oxcf->large_scale_tile)
-    cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
-
-  // default reference buffers update config
-  av1_configure_buffer_updates_firstpass(cpi, LF_UPDATE);
-
-  // Initialize fields related to forward keyframes
-  cpi->no_show_kf = 0;
-  cm->reset_decoder_state = 0;
-
-  // Don't allow a show_existing_frame to coincide with an error resilient or
-  // S-Frame. An exception can be made in the case of a keyframe, since it
-  // does not depend on any previous frames. We must make this exception here
-  // because of the use of show_existing_frame with forward coded keyframes.
-  struct lookahead_entry *lookahead_src = NULL;
-  if (cm->current_video_frame > 0)
-    lookahead_src = av1_lookahead_peek(cpi->lookahead, 0);
-
-  int use_show_existing = 1;
-  if (lookahead_src != NULL) {
-    const int is_error_resilient =
-        cpi->oxcf.error_resilient_mode ||
-        (lookahead_src->flags & AOM_EFLAG_ERROR_RESILIENT);
-    const int is_s_frame = cpi->oxcf.s_frame_mode ||
-                           (lookahead_src->flags & AOM_EFLAG_SET_S_FRAME);
-    const int is_key_frame =
-        (rc->frames_to_key == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY);
-    use_show_existing = !(is_error_resilient || is_s_frame) || is_key_frame;
-  }
-
-  if (oxcf->pass == 2 && cm->show_existing_frame && use_show_existing) {
-    // Manage the source buffer and flush out the source frame that has been
-    // coded already; Also get prepared for PSNR calculation if needed.
-    if ((source = av1_lookahead_pop(cpi->lookahead, flush)) == NULL) {
-      *size = 0;
-      return -1;
-    }
-    av1_apply_encoding_flags(cpi, source->flags);
-    cpi->source = &source->img;
-    // TODO(zoeliu): To track down to determine whether it's needed to adjust
-    // the frame rate.
-    *time_stamp = source->ts_start;
-    *time_end = source->ts_end;
-
-    // We need to adjust frame rate for an overlay frame
-    if (cpi->rc.is_src_frame_alt_ref) adjust_frame_rate(cpi, source);
-
-    // Find a free buffer for the new frame, releasing the reference
-    // previously
-    // held.
-    if (cm->new_fb_idx != INVALID_IDX) {
-      --pool->frame_bufs[cm->new_fb_idx].ref_count;
-    }
-    cm->new_fb_idx = get_free_fb(cm);
-
-    if (cm->new_fb_idx == INVALID_IDX) return -1;
-
-    // Clear down mmx registers
-    aom_clear_system_state();
-
-    // Start with a 0 size frame.
-    *size = 0;
-
-    // We need to update the gf_group for show_existing overlay frame
-    if (cpi->rc.is_src_frame_alt_ref) av1_rc_get_second_pass_params(cpi);
-
-    if (Pass2Encode(cpi, size, dest, frame_flags) != AOM_CODEC_OK)
-      return AOM_CODEC_ERROR;
-
-    if (cpi->b_calculate_psnr) generate_psnr_packet(cpi);
-
-#if CONFIG_INTERNAL_STATS
-    compute_internal_stats(cpi, (int)(*size));
-#endif  // CONFIG_INTERNAL_STATS
-
-    // Clear down mmx registers
-    aom_clear_system_state();
-
-    cm->show_existing_frame = 0;
-    return 0;
-  }
-
-  // Should we encode an arf frame.
-  arf_src_index = get_arf_src_index(cpi);
-  if (arf_src_index) {
-    for (i = 0; i <= arf_src_index; ++i) {
-      struct lookahead_entry *e = av1_lookahead_peek(cpi->lookahead, i);
-      // Avoid creating an alt-ref if there's a forced keyframe pending.
-      if (e == NULL) {
-        break;
-      } else if (e->flags == AOM_EFLAG_FORCE_KF) {
-        arf_src_index = 0;
-        flush = 1;
-        break;
-      }
-    }
-  }
-
-  if (arf_src_index) {
-    assert(arf_src_index <= rc->frames_to_key);
-
-    if ((source = av1_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
-      cm->showable_frame = 1;
-      cpi->alt_ref_source = source;
-      // When arf_src_index == rc->frames_to_key, it indicates a fwd_kf
-      if (arf_src_index == rc->frames_to_key) {
-        // Skip temporal filtering and mark as intra_only if we have a fwd_kf
-        const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-        int which_arf = gf_group->arf_update_idx[gf_group->index];
-        cpi->is_arf_filter_off[which_arf] = 1;
-        cpi->no_show_kf = 1;
-      } else {
-        if (oxcf->arnr_max_frames > 0) {
-          // Produce the filtered ARF frame.
-          av1_temporal_filter(cpi, arf_src_index);
-          aom_extend_frame_borders(&cpi->alt_ref_buffer, num_planes);
-          force_src_buffer = &cpi->alt_ref_buffer;
-        }
-      }
-      cm->show_frame = 0;
-      cm->intra_only = 0;
-
-      if (oxcf->pass < 2) {
-        // In second pass, the buffer updates configure will be set
-        // in the function av1_rc_get_second_pass_params
-        av1_configure_buffer_updates_firstpass(cpi, ARF_UPDATE);
-      }
-    }
-    rc->source_alt_ref_pending = 0;
-  }
-
-  // Should we encode an arf2 frame.
-  arf_src_index = get_arf2_src_index(cpi);
-  if (arf_src_index) {
-    for (i = 0; i <= arf_src_index; ++i) {
-      struct lookahead_entry *e = av1_lookahead_peek(cpi->lookahead, i);
-      // Avoid creating an alt-ref if there's a forced keyframe pending.
-      if (e == NULL) {
-        break;
-      } else if (e->flags == AOM_EFLAG_FORCE_KF) {
-        arf_src_index = 0;
-        flush = 1;
-        break;
-      }
-    }
-  }
-
-  if (arf_src_index) {
-    assert(arf_src_index <= rc->frames_to_key);
-
-    if ((source = av1_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
-      cm->showable_frame = 1;
-      cpi->alt_ref_source = source;
-
-      if (oxcf->arnr_max_frames > 0) {
-        // Produce the filtered ARF frame.
-        av1_temporal_filter(cpi, arf_src_index);
-        aom_extend_frame_borders(&cpi->alt_ref_buffer, num_planes);
-        force_src_buffer = &cpi->alt_ref_buffer;
-      }
-
-      cm->show_frame = 0;
-      cm->intra_only = 0;
-
-      if (oxcf->pass < 2) {
-        // In second pass, the buffer updates configure will be set
-        // in the function av1_rc_get_second_pass_params
-        av1_configure_buffer_updates_firstpass(cpi, INTNL_ARF_UPDATE);
-      }
-    }
-    rc->source_alt_ref_pending = 0;
-  }
-
-  rc->is_bwd_ref_frame = 0;
-  brf_src_index = get_brf_src_index(cpi);
-  if (brf_src_index) {
-    assert(brf_src_index <= rc->frames_to_key);
-    if ((source = av1_lookahead_peek(cpi->lookahead, brf_src_index)) != NULL) {
-      cm->showable_frame = 1;
-      cm->show_frame = 0;
-      cm->intra_only = 0;
-
-      if (oxcf->pass < 2) {
-        // In second pass, the buffer updates configure will be set
-        // in the function av1_rc_get_second_pass_params
-        av1_configure_buffer_updates_firstpass(cpi, BIPRED_UPDATE);
-      }
-    }
-  }
-
-  if (!source) {
-    // Get last frame source.
-    if (cm->current_video_frame > 0) {
-      if ((last_source = av1_lookahead_peek(cpi->lookahead, -1)) == NULL)
-        return -1;
-    }
-    if (cm->current_video_frame > 0) assert(last_source != NULL);
-    // Read in the source frame.
-    source = av1_lookahead_pop(cpi->lookahead, flush);
-
-    if (source != NULL) {
-      cm->show_frame = 1;
-      cm->intra_only = 0;
-
-      // Check to see if the frame should be encoded as an arf overlay.
-      check_src_altref(cpi, source);
-    }
-  }
-  if (source) {
-    cpi->unscaled_source = cpi->source =
-        force_src_buffer ? force_src_buffer : &source->img;
-    cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
-
-    *time_stamp = source->ts_start;
-    *time_end = source->ts_end;
-    av1_apply_encoding_flags(cpi, source->flags);
-    *frame_flags = (source->flags & AOM_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
-
-  } else {
-    *size = 0;
-    if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
-      av1_end_first_pass(cpi); /* get last stats packet */
-      cpi->twopass.first_pass_done = 1;
-    }
-    return -1;
-  }
-
-  if (source->ts_start < cpi->first_time_stamp_ever) {
-    cpi->first_time_stamp_ever = source->ts_start;
-    cpi->last_end_time_stamp_seen = source->ts_start;
-  }
-
-  // Clear down mmx registers
-  aom_clear_system_state();
-
-  // adjust frame rates based on timestamps given
-  if (cm->show_frame) adjust_frame_rate(cpi, source);
-
-  // Find a free buffer for the new frame, releasing the reference previously
-  // held.
-  if (cm->new_fb_idx != INVALID_IDX) {
-    --pool->frame_bufs[cm->new_fb_idx].ref_count;
-  }
-  cm->new_fb_idx = get_free_fb(cm);
-
-  if (cm->new_fb_idx == INVALID_IDX) return -1;
-
-  // Retain the RF_LEVEL for the current newly coded frame.
-  cpi->frame_rf_level[cm->new_fb_idx] =
-      cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
-
-  cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
-  cm->cur_frame->buf.buf_8bit_valid = 0;
-
-  if (cpi->film_grain_table) {
-    cm->seq_params.film_grain_params_present = aom_film_grain_table_lookup(
-        cpi->film_grain_table, *time_stamp, *time_end, 0 /* =erase */,
-        &cm->film_grain_params);
-  }
-  cm->cur_frame->film_grain_params_present =
-      cm->seq_params.film_grain_params_present;
-
-  // only one operating point supported now
-  const int64_t pts64 = ticks_to_timebase_units(timebase, *time_stamp);
-  if (pts64 < 0 || pts64 > UINT32_MAX) return AOM_CODEC_ERROR;
-  cpi->common.frame_presentation_time = (uint32_t)pts64;
-
-  // Start with a 0 size frame.
-  *size = 0;
-
-  cpi->frame_flags = *frame_flags;
-
-  if (oxcf->pass == 2) {
-    av1_rc_get_second_pass_params(cpi);
-  } else if (oxcf->pass == 1) {
-    setup_frame_size(cpi);
-  }
-
-  if (cpi->oxcf.pass != 0 || frame_is_intra_only(cm) == 1) {
-    for (i = 0; i < REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
-  }
-
-  cm->using_qmatrix = cpi->oxcf.using_qm;
-  cm->min_qmlevel = cpi->oxcf.qm_minlevel;
-  cm->max_qmlevel = cpi->oxcf.qm_maxlevel;
-
-  if (cm->seq_params.frame_id_numbers_present_flag) {
-    if (*time_stamp == 0) {
-      cpi->common.current_frame_id = -1;
-    }
-  }
-
-  cpi->cur_poc++;
-  if (oxcf->pass != 1 && cpi->common.allow_screen_content_tools &&
-      !frame_is_intra_only(cm)) {
-    if (cpi->common.seq_params.force_integer_mv == 2) {
-      struct lookahead_entry *previous_entry =
-          av1_lookahead_peek(cpi->lookahead, cpi->previous_index);
-      if (!previous_entry)
-        cpi->common.cur_frame_force_integer_mv = 0;
-      else
-        cpi->common.cur_frame_force_integer_mv = is_integer_mv(
-            cpi, cpi->source, &previous_entry->img, cpi->previous_hash_table);
-    } else {
-      cpi->common.cur_frame_force_integer_mv =
-          cpi->common.seq_params.force_integer_mv;
-    }
-  } else {
-    cpi->common.cur_frame_force_integer_mv = 0;
-  }
-
-  if (oxcf->pass == 1) {
-    cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(oxcf);
-    av1_first_pass(cpi, source);
-  } else if (oxcf->pass == 2) {
-    if (Pass2Encode(cpi, size, dest, frame_flags) != AOM_CODEC_OK)
-      return AOM_CODEC_ERROR;
-  } else {
-    // One pass encode
-    if (Pass0Encode(cpi, size, dest, 0, frame_flags) != AOM_CODEC_OK)
-      return AOM_CODEC_ERROR;
-  }
-  if (oxcf->pass != 1 && cpi->common.allow_screen_content_tools) {
-    cpi->previous_hash_table = &cm->cur_frame->hash_table;
-    {
-      int l;
-      for (l = -MAX_PRE_FRAMES; l < cpi->lookahead->max_sz; l++) {
-        if ((cpi->lookahead->buf + l) == source) {
-          cpi->previous_index = l;
-          break;
-        }
-      }
-
-      if (l == cpi->lookahead->max_sz) {
-        aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                           "Failed to find last frame original buffer");
-      }
-    }
-  }
-
-  if (!cm->large_scale_tile) {
-    cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
-  }
-
-#define EXT_TILE_DEBUG 0
-#if EXT_TILE_DEBUG
-  if (cm->large_scale_tile && oxcf->pass == 2) {
-    char fn[20] = "./fc";
-    fn[4] = cm->current_video_frame / 100 + '0';
-    fn[5] = (cm->current_video_frame % 100) / 10 + '0';
-    fn[6] = (cm->current_video_frame % 10) + '0';
-    fn[7] = '\0';
-    av1_print_frame_contexts(cm->fc, fn);
-  }
-#endif  // EXT_TILE_DEBUG
-#undef EXT_TILE_DEBUG
-
-  cm->showable_frame = !cm->show_frame && cm->showable_frame;
-
-  // No frame encoded, or frame was dropped, release scaled references.
-  if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
-    release_scaled_references(cpi);
-  }
-
-  if (*size > 0) {
-    cpi->droppable = !frame_is_reference(cpi);
-  }
-
-  aom_usec_timer_mark(&cmptimer);
-  cpi->time_compress_data += aom_usec_timer_elapsed(&cmptimer);
-
-  if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame)
-    generate_psnr_packet(cpi);
-
-#if CONFIG_INTERNAL_STATS
-  if (oxcf->pass != 1) {
-    compute_internal_stats(cpi, (int)(*size));
-  }
-#endif  // CONFIG_INTERNAL_STATS
-
-  aom_clear_system_state();
-
-  return 0;
-}
-
-int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
-  AV1_COMMON *cm = &cpi->common;
-  if (!cm->show_frame) {
-    return -1;
-  } else {
-    int ret;
-    if (cm->frame_to_show) {
-      *dest = *cm->frame_to_show;
-      dest->y_width = cm->width;
-      dest->y_height = cm->height;
-      dest->uv_width = cm->width >> cm->seq_params.subsampling_x;
-      dest->uv_height = cm->height >> cm->seq_params.subsampling_y;
-      ret = 0;
-    } else {
-      ret = -1;
-    }
-    aom_clear_system_state();
-    return ret;
-  }
-}
-
-int av1_get_last_show_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *frame) {
-  if (cpi->last_show_frame_buf_idx == INVALID_IDX) return -1;
-
-  *frame =
-      cpi->common.buffer_pool->frame_bufs[cpi->last_show_frame_buf_idx].buf;
-  return 0;
-}
-
-static int equal_dimensions_and_border(const YV12_BUFFER_CONFIG *a,
-                                       const YV12_BUFFER_CONFIG *b) {
-  return a->y_height == b->y_height && a->y_width == b->y_width &&
-         a->uv_height == b->uv_height && a->uv_width == b->uv_width &&
-         a->y_stride == b->y_stride && a->uv_stride == b->uv_stride &&
-         a->border == b->border &&
-         (a->flags & YV12_FLAG_HIGHBITDEPTH) ==
-             (b->flags & YV12_FLAG_HIGHBITDEPTH);
-}
-
-aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm,
-                                       YV12_BUFFER_CONFIG *new_frame,
-                                       YV12_BUFFER_CONFIG *sd) {
-  const int num_planes = av1_num_planes(cm);
-  if (!equal_dimensions_and_border(new_frame, sd))
-    aom_internal_error(&cm->error, AOM_CODEC_ERROR,
-                       "Incorrect buffer dimensions");
-  else
-    aom_yv12_copy_frame(new_frame, sd, num_planes);
-
-  return cm->error.error_code;
-}
-
-int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
-                          AOM_SCALING vert_mode) {
-  int hr = 0, hs = 0, vr = 0, vs = 0;
-
-  if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
-
-  Scale2Ratio(horiz_mode, &hr, &hs);
-  Scale2Ratio(vert_mode, &vr, &vs);
-
-  // always go to the next whole number
-  cpi->resize_pending_width = (hs - 1 + cpi->oxcf.width * hr) / hs;
-  cpi->resize_pending_height = (vs - 1 + cpi->oxcf.height * vr) / vs;
-
-  return 0;
-}
-
-int av1_get_quantizer(AV1_COMP *cpi) { return cpi->common.base_qindex; }
-
-int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *frame_size) {
-  size_t output_size = 0;
-  size_t total_bytes_read = 0;
-  size_t remaining_size = *frame_size;
-  uint8_t *buff_ptr = buffer;
-
-  // go through each OBUs
-  while (total_bytes_read < *frame_size) {
-    uint8_t saved_obu_header[2];
-    uint64_t obu_payload_size;
-    size_t length_of_payload_size;
-    size_t length_of_obu_size;
-    uint32_t obu_header_size = (buff_ptr[0] >> 2) & 0x1 ? 2 : 1;
-    size_t obu_bytes_read = obu_header_size;  // bytes read for current obu
-
-    // save the obu header (1 or 2 bytes)
-    memmove(saved_obu_header, buff_ptr, obu_header_size);
-    // clear the obu_has_size_field
-    saved_obu_header[0] = saved_obu_header[0] & (~0x2);
-
-    // get the payload_size and length of payload_size
-    if (aom_uleb_decode(buff_ptr + obu_header_size, remaining_size,
-                        &obu_payload_size, &length_of_payload_size) != 0) {
-      return AOM_CODEC_ERROR;
-    }
-    obu_bytes_read += length_of_payload_size;
-
-    // calculate the length of size of the obu header plus payload
-    length_of_obu_size =
-        aom_uleb_size_in_bytes((uint64_t)(obu_header_size + obu_payload_size));
-
-    // move the rest of data to new location
-    memmove(buff_ptr + length_of_obu_size + obu_header_size,
-            buff_ptr + obu_bytes_read, remaining_size - obu_bytes_read);
-    obu_bytes_read += (size_t)obu_payload_size;
-
-    // write the new obu size
-    const uint64_t obu_size = obu_header_size + obu_payload_size;
-    size_t coded_obu_size;
-    if (aom_uleb_encode(obu_size, sizeof(obu_size), buff_ptr,
-                        &coded_obu_size) != 0) {
-      return AOM_CODEC_ERROR;
-    }
-
-    // write the saved (modified) obu_header following obu size
-    memmove(buff_ptr + length_of_obu_size, saved_obu_header, obu_header_size);
-
-    total_bytes_read += obu_bytes_read;
-    remaining_size -= obu_bytes_read;
-    buff_ptr += length_of_obu_size + obu_size;
-    output_size += length_of_obu_size + (size_t)obu_size;
-  }
-
-  *frame_size = output_size;
-  return AOM_CODEC_OK;
-}
-
-void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags) {
-  // TODO(yunqingwang): For what references to use, external encoding flags
-  // should be consistent with internal reference frame selection. Need to
-  // ensure that there is not conflict between the two. In AV1 encoder, the
-  // priority rank for 7 reference frames are: LAST, ALTREF, LAST2, LAST3,
-  // GOLDEN, BWDREF, ALTREF2. If only one reference frame is used, it must be
-  // LAST.
-  cpi->ext_ref_frame_flags = AOM_REFFRAME_ALL;
-  if (flags &
-      (AOM_EFLAG_NO_REF_LAST | AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 |
-       AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD |
-       AOM_EFLAG_NO_REF_ARF2)) {
-    if (flags & AOM_EFLAG_NO_REF_LAST) {
-      cpi->ext_ref_frame_flags = 0;
-    } else {
-      int ref = AOM_REFFRAME_ALL;
-
-      if (flags & AOM_EFLAG_NO_REF_LAST2) ref ^= AOM_LAST2_FLAG;
-      if (flags & AOM_EFLAG_NO_REF_LAST3) ref ^= AOM_LAST3_FLAG;
-
-      if (flags & AOM_EFLAG_NO_REF_GF) ref ^= AOM_GOLD_FLAG;
-
-      if (flags & AOM_EFLAG_NO_REF_ARF) {
-        ref ^= AOM_ALT_FLAG;
-        ref ^= AOM_BWD_FLAG;
-        ref ^= AOM_ALT2_FLAG;
-      } else {
-        if (flags & AOM_EFLAG_NO_REF_BWD) ref ^= AOM_BWD_FLAG;
-        if (flags & AOM_EFLAG_NO_REF_ARF2) ref ^= AOM_ALT2_FLAG;
-      }
-
-      av1_use_as_reference(cpi, ref);
-    }
-  }
-
-  if (flags &
-      (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF)) {
-    int upd = AOM_REFFRAME_ALL;
-
-    // Refreshing LAST/LAST2/LAST3 is handled by 1 common flag.
-    if (flags & AOM_EFLAG_NO_UPD_LAST) upd ^= AOM_LAST_FLAG;
-
-    if (flags & AOM_EFLAG_NO_UPD_GF) upd ^= AOM_GOLD_FLAG;
-
-    if (flags & AOM_EFLAG_NO_UPD_ARF) {
-      upd ^= AOM_ALT_FLAG;
-      upd ^= AOM_BWD_FLAG;
-      upd ^= AOM_ALT2_FLAG;
-    }
-
-    av1_update_reference(cpi, upd);
-  }
-
-  cpi->ext_use_ref_frame_mvs = cpi->oxcf.allow_ref_frame_mvs &
-                               ((flags & AOM_EFLAG_NO_REF_FRAME_MVS) == 0);
-  cpi->ext_use_error_resilient = cpi->oxcf.error_resilient_mode |
-                                 ((flags & AOM_EFLAG_ERROR_RESILIENT) != 0);
-  cpi->ext_use_s_frame =
-      cpi->oxcf.s_frame_mode | ((flags & AOM_EFLAG_SET_S_FRAME) != 0);
-  cpi->ext_use_primary_ref_none = (flags & AOM_EFLAG_SET_PRIMARY_REF_NONE) != 0;
-
-  if (flags & AOM_EFLAG_NO_UPD_ENTROPY) {
-    av1_update_entropy(cpi, 0);
-  }
-}
-
-int64_t timebase_units_to_ticks(const aom_rational_t *timebase, int64_t n) {
-  return n * TICKS_PER_SEC * timebase->num / timebase->den;
-}
-
-int64_t ticks_to_timebase_units(const aom_rational_t *timebase, int64_t n) {
-  const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
-  return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
-}
-
-aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi) {
-  if (!cpi) return NULL;
-
-  uint8_t header_buf[512] = { 0 };
-  const uint32_t sequence_header_size =
-      write_sequence_header_obu(cpi, &header_buf[0]);
-  assert(sequence_header_size <= sizeof(header_buf));
-  if (sequence_header_size == 0) return NULL;
-
-  const size_t obu_header_size = 1;
-  const size_t size_field_size = aom_uleb_size_in_bytes(sequence_header_size);
-  const size_t payload_offset = obu_header_size + size_field_size;
-
-  if (payload_offset + sequence_header_size > sizeof(header_buf)) return NULL;
-  memmove(&header_buf[payload_offset], &header_buf[0], sequence_header_size);
-
-  if (write_obu_header(OBU_SEQUENCE_HEADER, 0, &header_buf[0]) !=
-      obu_header_size) {
-    return NULL;
-  }
-
-  size_t coded_size_field_size = 0;
-  if (aom_uleb_encode(sequence_header_size, size_field_size,
-                      &header_buf[obu_header_size],
-                      &coded_size_field_size) != 0) {
-    return NULL;
-  }
-  assert(coded_size_field_size == size_field_size);
-
-  aom_fixed_buf_t *global_headers =
-      (aom_fixed_buf_t *)malloc(sizeof(*global_headers));
-  if (!global_headers) return NULL;
-
-  const size_t global_header_buf_size =
-      obu_header_size + size_field_size + sequence_header_size;
-
-  global_headers->buf = malloc(global_header_buf_size);
-  if (!global_headers->buf) {
-    free(global_headers);
-    return NULL;
-  }
-
-  memcpy(global_headers->buf, &header_buf[0], global_header_buf_size);
-  global_headers->sz = global_header_buf_size;
-  return global_headers;
-}
diff --git a/third_party/aom/av1/encoder/encoder.h b/third_party/aom/av1/encoder/encoder.h
deleted file mode 100644
index ee7fc4637..000000000
--- a/third_party/aom/av1/encoder/encoder.h
+++ /dev/null
@@ -1,985 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODER_H_
-#define AOM_AV1_ENCODER_ENCODER_H_
-
-#include <stdio.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aomcx.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/thread_common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/resize.h"
-#include "av1/common/timing.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/firstpass.h"
-#include "av1/encoder/lookahead.h"
-#include "av1/encoder/mbgraph.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/speed_features.h"
-#include "av1/encoder/tokenize.h"
-
-#if CONFIG_INTERNAL_STATS
-#include "aom_dsp/ssim.h"
-#endif
-#include "aom_dsp/variance.h"
-#if CONFIG_DENOISE
-#include "aom_dsp/noise_model.h"
-#endif
-#include "aom/internal/aom_codec_internal.h"
-#include "aom_util/aom_thread.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
-  int nmv_vec_cost[MV_JOINTS];
-  int nmv_costs[2][MV_VALS];
-  int nmv_costs_hp[2][MV_VALS];
-
-  FRAME_CONTEXT fc;
-} CODING_CONTEXT;
-
-typedef enum {
-  // regular inter frame
-  REGULAR_FRAME = 0,
-  // alternate reference frame
-  ARF_FRAME = 1,
-  // overlay frame
-  OVERLAY_FRAME = 2,
-  // golden frame
-  GLD_FRAME = 3,
-  // backward reference frame
-  BRF_FRAME = 4,
-  // extra alternate reference frame
-  EXT_ARF_FRAME = 5,
-  FRAME_CONTEXT_INDEXES
-} FRAME_CONTEXT_INDEX;
-
-typedef enum {
-  NORMAL = 0,
-  FOURFIVE = 1,
-  THREEFIVE = 2,
-  ONETWO = 3
-} AOM_SCALING;
-
-typedef enum {
-  // Good Quality Fast Encoding. The encoder balances quality with the amount of
-  // time it takes to encode the output. Speed setting controls how fast.
-  GOOD
-} MODE;
-
-typedef enum {
-  FRAMEFLAGS_KEY = 1 << 0,
-  FRAMEFLAGS_GOLDEN = 1 << 1,
-  FRAMEFLAGS_BWDREF = 1 << 2,
-  // TODO(zoeliu): To determine whether a frame flag is needed for ALTREF2_FRAME
-  FRAMEFLAGS_ALTREF = 1 << 3,
-} FRAMETYPE_FLAGS;
-
-typedef enum {
-  NO_AQ = 0,
-  VARIANCE_AQ = 1,
-  COMPLEXITY_AQ = 2,
-  CYCLIC_REFRESH_AQ = 3,
-  AQ_MODE_COUNT  // This should always be the last member of the enum
-} AQ_MODE;
-typedef enum {
-  NO_DELTA_Q = 0,
-  DELTA_Q_ONLY = 1,
-  DELTA_Q_LF = 2,
-  DELTAQ_MODE_COUNT  // This should always be the last member of the enum
-} DELTAQ_MODE;
-
-typedef enum {
-  RESIZE_NONE = 0,    // No frame resizing allowed.
-  RESIZE_FIXED = 1,   // All frames are coded at the specified scale.
-  RESIZE_RANDOM = 2,  // All frames are coded at a random scale.
-  RESIZE_MODES
-} RESIZE_MODE;
-
-typedef enum {
-  SUPERRES_NONE = 0,     // No frame superres allowed
-  SUPERRES_FIXED = 1,    // All frames are coded at the specified scale,
-                         // and super-resolved.
-  SUPERRES_RANDOM = 2,   // All frames are coded at a random scale,
-                         // and super-resolved.
-  SUPERRES_QTHRESH = 3,  // Superres scale for a frame is determined based on
-                         // q_index
-  SUPERRES_MODES
-} SUPERRES_MODE;
-
-typedef struct AV1EncoderConfig {
-  BITSTREAM_PROFILE profile;
-  aom_bit_depth_t bit_depth;     // Codec bit-depth.
-  int width;                     // width of data passed to the compressor
-  int height;                    // height of data passed to the compressor
-  int forced_max_frame_width;    // forced maximum width of frame (if != 0)
-  int forced_max_frame_height;   // forced maximum height of frame (if != 0)
-  unsigned int input_bit_depth;  // Input bit depth.
-  double init_framerate;         // set to passed in framerate
-  int64_t target_bandwidth;      // bandwidth to be used in bits per second
-
-  int noise_sensitivity;  // pre processing blur: recommendation 0
-  int sharpness;          // sharpening output: recommendation 0:
-  int speed;
-  // maximum allowed bitrate for any intra frame in % of bitrate target.
-  unsigned int rc_max_intra_bitrate_pct;
-  // maximum allowed bitrate for any inter frame in % of bitrate target.
-  unsigned int rc_max_inter_bitrate_pct;
-  // percent of rate boost for golden frame in CBR mode.
-  unsigned int gf_cbr_boost_pct;
-
-  MODE mode;
-  int pass;
-
-  // Key Framing Operations
-  int auto_key;  // autodetect cut scenes and set the keyframes
-  int key_freq;  // maximum distance to key frame.
-  int sframe_dist;
-  int sframe_mode;
-  int sframe_enabled;
-  int lag_in_frames;  // how many frames lag before we start encoding
-  int fwd_kf_enabled;
-
-  // ----------------------------------------------------------------
-  // DATARATE CONTROL OPTIONS
-
-  // vbr, cbr, constrained quality or constant quality
-  enum aom_rc_mode rc_mode;
-
-  // buffer targeting aggressiveness
-  int under_shoot_pct;
-  int over_shoot_pct;
-
-  // buffering parameters
-  int64_t starting_buffer_level_ms;
-  int64_t optimal_buffer_level_ms;
-  int64_t maximum_buffer_size_ms;
-
-  // Frame drop threshold.
-  int drop_frames_water_mark;
-
-  // controlling quality
-  int fixed_q;
-  int worst_allowed_q;
-  int best_allowed_q;
-  int cq_level;
-  AQ_MODE aq_mode;  // Adaptive Quantization mode
-  DELTAQ_MODE deltaq_mode;
-  int enable_cdef;
-  int enable_restoration;
-  int disable_trellis_quant;
-  int using_qm;
-  int qm_y;
-  int qm_u;
-  int qm_v;
-  int qm_minlevel;
-  int qm_maxlevel;
-#if CONFIG_DIST_8X8
-  int using_dist_8x8;
-#endif
-  unsigned int num_tile_groups;
-  unsigned int mtu;
-
-  // Internal frame size scaling.
-  RESIZE_MODE resize_mode;
-  uint8_t resize_scale_denominator;
-  uint8_t resize_kf_scale_denominator;
-
-  // Frame Super-Resolution size scaling.
-  SUPERRES_MODE superres_mode;
-  uint8_t superres_scale_denominator;
-  uint8_t superres_kf_scale_denominator;
-  int superres_qthresh;
-  int superres_kf_qthresh;
-
-  // Enable feature to reduce the frame quantization every x frames.
-  int frame_periodic_boost;
-
-  // two pass datarate control
-  int two_pass_vbrbias;  // two pass datarate control tweaks
-  int two_pass_vbrmin_section;
-  int two_pass_vbrmax_section;
-  // END DATARATE CONTROL OPTIONS
-  // ----------------------------------------------------------------
-
-  int enable_auto_arf;
-  int enable_auto_brf;  // (b)ackward (r)ef (f)rame
-
-  /* Bitfield defining the error resiliency features to enable.
-   * Can provide decodable frames after losses in previous
-   * frames and decodable partitions after losses in the same frame.
-   */
-  unsigned int error_resilient_mode;
-
-  unsigned int s_frame_mode;
-
-  /* Bitfield defining the parallel decoding mode where the
-   * decoding in successive frames may be conducted in parallel
-   * just by decoding the frame headers.
-   */
-  unsigned int frame_parallel_decoding_mode;
-
-  unsigned int limit;
-
-  int arnr_max_frames;
-  int arnr_strength;
-
-  int min_gf_interval;
-  int max_gf_interval;
-
-  int row_mt;
-  int tile_columns;
-  int tile_rows;
-  int tile_width_count;
-  int tile_height_count;
-  int tile_widths[MAX_TILE_COLS];
-  int tile_heights[MAX_TILE_ROWS];
-
-  int max_threads;
-
-  aom_fixed_buf_t two_pass_stats_in;
-  struct aom_codec_pkt_list *output_pkt_list;
-
-#if CONFIG_FP_MB_STATS
-  aom_fixed_buf_t firstpass_mb_stats_in;
-#endif
-
-  aom_tune_metric tuning;
-  aom_tune_content content;
-  int use_highbitdepth;
-  aom_color_primaries_t color_primaries;
-  aom_transfer_characteristics_t transfer_characteristics;
-  aom_matrix_coefficients_t matrix_coefficients;
-  aom_chroma_sample_position_t chroma_sample_position;
-  int color_range;
-  int render_width;
-  int render_height;
-  aom_timing_info_type_t timing_info_type;
-  int timing_info_present;
-  aom_timing_info_t timing_info;
-  int decoder_model_info_present_flag;
-  int display_model_info_present_flag;
-  int buffer_removal_time_present;
-  aom_dec_model_info_t buffer_model;
-  aom_dec_model_op_parameters_t op_params[MAX_NUM_OPERATING_POINTS + 1];
-  aom_op_timing_info_t op_frame_timing[MAX_NUM_OPERATING_POINTS + 1];
-  int film_grain_test_vector;
-  const char *film_grain_table_filename;
-
-  uint8_t cdf_update_mode;
-  aom_superblock_size_t superblock_size;
-  unsigned int large_scale_tile;
-  unsigned int single_tile_decoding;
-  int monochrome;
-  unsigned int full_still_picture_hdr;
-  int enable_dual_filter;
-  unsigned int motion_vector_unit_test;
-  const cfg_options_t *cfg;
-  int enable_order_hint;
-  int enable_jnt_comp;
-  int enable_ref_frame_mvs;
-  unsigned int allow_ref_frame_mvs;
-  int enable_warped_motion;
-  int allow_warped_motion;
-  int enable_superres;
-  unsigned int save_as_annexb;
-
-#if CONFIG_DENOISE
-  float noise_level;
-  int noise_block_size;
-#endif
-
-  unsigned int chroma_subsampling_x;
-  unsigned int chroma_subsampling_y;
-} AV1EncoderConfig;
-
-static INLINE int is_lossless_requested(const AV1EncoderConfig *cfg) {
-  return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
-}
-
-typedef struct FRAME_COUNTS {
-// Note: This structure should only contain 'unsigned int' fields, or
-// aggregates built solely from 'unsigned int' fields/elements
-#if CONFIG_ENTROPY_STATS
-  unsigned int kf_y_mode[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][INTRA_MODES];
-  unsigned int angle_delta[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1];
-  unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
-  unsigned int uv_mode[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES];
-  unsigned int cfl_sign[CFL_JOINT_SIGNS];
-  unsigned int cfl_alpha[CFL_ALPHA_CONTEXTS][CFL_ALPHABET_SIZE];
-  unsigned int palette_y_mode[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2];
-  unsigned int palette_uv_mode[PALETTE_UV_MODE_CONTEXTS][2];
-  unsigned int palette_y_size[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
-  unsigned int palette_uv_size[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
-  unsigned int palette_y_color_index[PALETTE_SIZES]
-                                    [PALETTE_COLOR_INDEX_CONTEXTS]
-                                    [PALETTE_COLORS];
-  unsigned int palette_uv_color_index[PALETTE_SIZES]
-                                     [PALETTE_COLOR_INDEX_CONTEXTS]
-                                     [PALETTE_COLORS];
-  unsigned int partition[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
-  unsigned int txb_skip[TOKEN_CDF_Q_CTXS][TX_SIZES][TXB_SKIP_CONTEXTS][2];
-  unsigned int eob_extra[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
-                        [EOB_COEF_CONTEXTS][2];
-  unsigned int dc_sign[PLANE_TYPES][DC_SIGN_CONTEXTS][2];
-  unsigned int coeff_lps[TX_SIZES][PLANE_TYPES][BR_CDF_SIZE - 1][LEVEL_CONTEXTS]
-                        [2];
-  unsigned int eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS][2];
-  unsigned int eob_multi16[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][5];
-  unsigned int eob_multi32[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][6];
-  unsigned int eob_multi64[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][7];
-  unsigned int eob_multi128[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][8];
-  unsigned int eob_multi256[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][9];
-  unsigned int eob_multi512[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][10];
-  unsigned int eob_multi1024[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][11];
-  unsigned int coeff_lps_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
-                              [LEVEL_CONTEXTS][BR_CDF_SIZE];
-  unsigned int coeff_base_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
-                               [SIG_COEF_CONTEXTS][NUM_BASE_LEVELS + 2];
-  unsigned int coeff_base_eob_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
-                                   [SIG_COEF_CONTEXTS_EOB][NUM_BASE_LEVELS + 1];
-  unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
-  unsigned int zeromv_mode[GLOBALMV_MODE_CONTEXTS][2];
-  unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
-  unsigned int drl_mode[DRL_MODE_CONTEXTS][2];
-  unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
-  unsigned int wedge_idx[BLOCK_SIZES_ALL][16];
-  unsigned int interintra[BLOCK_SIZE_GROUPS][2];
-  unsigned int interintra_mode[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
-  unsigned int wedge_interintra[BLOCK_SIZES_ALL][2];
-  unsigned int compound_type[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1];
-  unsigned int motion_mode[BLOCK_SIZES_ALL][MOTION_MODES];
-  unsigned int obmc[BLOCK_SIZES_ALL][2];
-  unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
-  unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
-  unsigned int comp_ref_type[COMP_REF_TYPE_CONTEXTS][2];
-  unsigned int uni_comp_ref[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1][2];
-  unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS - 1][2];
-  unsigned int comp_ref[REF_CONTEXTS][FWD_REFS - 1][2];
-  unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS - 1][2];
-  unsigned int intrabc[2];
-
-  unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
-  unsigned int intra_tx_size[MAX_TX_CATS][TX_SIZE_CONTEXTS][MAX_TX_DEPTH + 1];
-  unsigned int skip_mode[SKIP_MODE_CONTEXTS][2];
-  unsigned int skip[SKIP_CONTEXTS][2];
-  unsigned int compound_index[COMP_INDEX_CONTEXTS][2];
-  unsigned int comp_group_idx[COMP_GROUP_IDX_CONTEXTS][2];
-  unsigned int delta_q[DELTA_Q_PROBS][2];
-  unsigned int delta_lf_multi[FRAME_LF_COUNT][DELTA_LF_PROBS][2];
-  unsigned int delta_lf[DELTA_LF_PROBS][2];
-
-  unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
-  unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
-                           [TX_TYPES];
-  unsigned int filter_intra_mode[FILTER_INTRA_MODES];
-  unsigned int filter_intra[BLOCK_SIZES_ALL][2];
-  unsigned int switchable_restore[RESTORE_SWITCHABLE_TYPES];
-  unsigned int wiener_restore[2];
-  unsigned int sgrproj_restore[2];
-#endif  // CONFIG_ENTROPY_STATS
-
-  unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
-                                [SWITCHABLE_FILTERS];
-} FRAME_COUNTS;
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-#define INTER_MODE_RD_DATA_OVERALL_SIZE 6400
-
-typedef struct {
-  int ready;
-  double a;
-  double b;
-  double dist_mean;
-  double ld_mean;
-  double sse_mean;
-  double sse_sse_mean;
-  double sse_ld_mean;
-  int num;
-  double dist_sum;
-  double ld_sum;
-  double sse_sum;
-  double sse_sse_sum;
-  double sse_ld_sum;
-} InterModeRdModel;
-
-typedef struct {
-  int idx;
-  int64_t rd;
-} RdIdxPair;
-// TODO(angiebird): This is an estimated size. We still need to figure what is
-// the maximum number of modes.
-#define MAX_INTER_MODES 1024
-typedef struct inter_modes_info {
-  int num;
-  MB_MODE_INFO mbmi_arr[MAX_INTER_MODES];
-  int mode_rate_arr[MAX_INTER_MODES];
-  int64_t sse_arr[MAX_INTER_MODES];
-  int64_t est_rd_arr[MAX_INTER_MODES];
-  RdIdxPair rd_idx_pair_arr[MAX_INTER_MODES];
-} InterModesInfo;
-#endif
-
-// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
-typedef struct TileDataEnc {
-  TileInfo tile_info;
-  int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES];
-  int mode_map[BLOCK_SIZES_ALL][MAX_MODES];
-  int m_search_count;
-  int ex_search_count;
-  CFL_CTX cfl;
-  DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
-  uint8_t allow_update_cdf;
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-  InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
-  InterModesInfo inter_modes_info;
-#endif
-} TileDataEnc;
-
-typedef struct {
-  TOKENEXTRA *start;
-  TOKENEXTRA *stop;
-  unsigned int count;
-} TOKENLIST;
-
-typedef struct RD_COUNTS {
-  int64_t comp_pred_diff[REFERENCE_MODES];
-  // Stores number of 4x4 blocks using global motion per reference frame.
-  int global_motion_used[REF_FRAMES];
-  int compound_ref_used_flag;
-  int skip_mode_used_flag;
-} RD_COUNTS;
-
-typedef struct ThreadData {
-  MACROBLOCK mb;
-  RD_COUNTS rd_counts;
-  FRAME_COUNTS *counts;
-  PC_TREE *pc_tree;
-  PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
-  uint32_t *hash_value_buffer[2][2];
-  int32_t *wsrc_buf;
-  int32_t *mask_buf;
-  uint8_t *above_pred_buf;
-  uint8_t *left_pred_buf;
-  PALETTE_BUFFER *palette_buffer;
-  CONV_BUF_TYPE *tmp_conv_dst;
-  uint8_t *tmp_obmc_bufs[2];
-  int intrabc_used_this_tile;
-} ThreadData;
-
-struct EncWorkerData;
-
-typedef struct ActiveMap {
-  int enabled;
-  int update;
-  unsigned char *map;
-} ActiveMap;
-
-#if CONFIG_INTERNAL_STATS
-// types of stats
-typedef enum {
-  STAT_Y,
-  STAT_U,
-  STAT_V,
-  STAT_ALL,
-  NUM_STAT_TYPES  // This should always be the last member of the enum
-} StatType;
-
-typedef struct IMAGE_STAT {
-  double stat[NUM_STAT_TYPES];
-  double worst;
-} ImageStat;
-#endif  // CONFIG_INTERNAL_STATS
-
-typedef struct {
-  int ref_count;
-  YV12_BUFFER_CONFIG buf;
-} EncRefCntBuffer;
-
-typedef struct TileBufferEnc {
-  uint8_t *data;
-  size_t size;
-} TileBufferEnc;
-
-typedef struct AV1_COMP {
-  QUANTS quants;
-  ThreadData td;
-  FRAME_COUNTS counts;
-  MB_MODE_INFO_EXT *mbmi_ext_base;
-  CB_COEFF_BUFFER *coeff_buffer_base;
-  Dequants dequants;
-  AV1_COMMON common;
-  AV1EncoderConfig oxcf;
-  struct lookahead_ctx *lookahead;
-  struct lookahead_entry *alt_ref_source;
-  int no_show_kf;
-
-  int optimize_speed_feature;
-  int optimize_seg_arr[MAX_SEGMENTS];
-
-  YV12_BUFFER_CONFIG *source;
-  YV12_BUFFER_CONFIG *last_source;  // NULL for first frame and alt_ref frames
-  YV12_BUFFER_CONFIG *unscaled_source;
-  YV12_BUFFER_CONFIG scaled_source;
-  YV12_BUFFER_CONFIG *unscaled_last_source;
-  YV12_BUFFER_CONFIG scaled_last_source;
-
-  // For a still frame, this flag is set to 1 to skip partition search.
-  int partition_search_skippable_frame;
-  double csm_rate_array[32];
-  double m_rate_array[32];
-  int rate_size;
-  int rate_index;
-  hash_table *previous_hash_table;
-  int previous_index;
-  int cur_poc;  // DebugInfo
-
-  unsigned int row_mt;
-  int scaled_ref_idx[REF_FRAMES];
-  int ref_fb_idx[REF_FRAMES];
-  int refresh_fb_idx;  // ref frame buffer index to refresh
-
-  int last_show_frame_buf_idx;  // last show frame buffer index
-
-  int refresh_last_frame;
-  int refresh_golden_frame;
-  int refresh_bwd_ref_frame;
-  int refresh_alt2_ref_frame;
-  int refresh_alt_ref_frame;
-#if USE_SYMM_MULTI_LAYER
-  int new_bwdref_update_rule;
-#endif
-
-  int ext_refresh_frame_flags_pending;
-  int ext_refresh_last_frame;
-  int ext_refresh_golden_frame;
-  int ext_refresh_bwd_ref_frame;
-  int ext_refresh_alt2_ref_frame;
-  int ext_refresh_alt_ref_frame;
-
-  int ext_refresh_frame_context_pending;
-  int ext_refresh_frame_context;
-  int ext_use_ref_frame_mvs;
-  int ext_use_error_resilient;
-  int ext_use_s_frame;
-  int ext_use_primary_ref_none;
-
-  YV12_BUFFER_CONFIG last_frame_uf;
-  YV12_BUFFER_CONFIG trial_frame_rst;
-
-  // Ambient reconstruction err target for force key frames
-  int64_t ambient_err;
-
-  RD_OPT rd;
-
-  CODING_CONTEXT coding_context;
-
-  int gmtype_cost[TRANS_TYPES];
-  int gmparams_cost[REF_FRAMES];
-
-  int nmv_costs[2][MV_VALS];
-  int nmv_costs_hp[2][MV_VALS];
-
-  int64_t last_time_stamp_seen;
-  int64_t last_end_time_stamp_seen;
-  int64_t first_time_stamp_ever;
-
-  RATE_CONTROL rc;
-  double framerate;
-
-  // NOTE(zoeliu): Any inter frame allows maximum of REF_FRAMES inter
-  // references; Plus the currently coded frame itself, it is needed to allocate
-  // sufficient space to the size of the maximum possible number of frames.
-  int interp_filter_selected[REF_FRAMES + 1][SWITCHABLE];
-
-  struct aom_codec_pkt_list *output_pkt_list;
-
-  MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
-  int mbgraph_n_frames;  // number of frames filled in the above
-  int static_mb_pct;     // % forced skip mbs by segmentation
-  int ref_frame_flags;
-  int ext_ref_frame_flags;
-  RATE_FACTOR_LEVEL frame_rf_level[FRAME_BUFFERS];
-
-  SPEED_FEATURES sf;
-
-  unsigned int max_mv_magnitude;
-  int mv_step_param;
-
-  int allow_comp_inter_inter;
-  int all_one_sided_refs;
-
-  uint8_t *segmentation_map;
-
-  CYCLIC_REFRESH *cyclic_refresh;
-  ActiveMap active_map;
-
-  fractional_mv_step_fp *find_fractional_mv_step;
-  av1_diamond_search_fn_t diamond_search_sad;
-  aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL];
-  uint64_t time_receive_data;
-  uint64_t time_compress_data;
-  uint64_t time_pick_lpf;
-  uint64_t time_encode_sb_row;
-
-#if CONFIG_FP_MB_STATS
-  int use_fp_mb_stats;
-#endif
-
-  TWO_PASS twopass;
-
-  YV12_BUFFER_CONFIG alt_ref_buffer;
-
-#if CONFIG_INTERNAL_STATS
-  unsigned int mode_chosen_counts[MAX_MODES];
-
-  int count;
-  uint64_t total_sq_error;
-  uint64_t total_samples;
-  ImageStat psnr;
-
-  double total_blockiness;
-  double worst_blockiness;
-
-  int bytes;
-  double summed_quality;
-  double summed_weights;
-  unsigned int tot_recode_hits;
-  double worst_ssim;
-
-  ImageStat fastssim;
-  ImageStat psnrhvs;
-
-  int b_calculate_blockiness;
-  int b_calculate_consistency;
-
-  double total_inconsistency;
-  double worst_consistency;
-  Ssimv *ssim_vars;
-  Metrics metrics;
-#endif
-  int b_calculate_psnr;
-
-  int droppable;
-
-  int initial_width;
-  int initial_height;
-  int initial_mbs;  // Number of MBs in the full-size frame; to be used to
-                    // normalize the firstpass stats. This will differ from the
-                    // number of MBs in the current frame when the frame is
-                    // scaled.
-
-  // When resize is triggered through external control, the desired width/height
-  // are stored here until use in the next frame coded. They are effective only
-  // for
-  // one frame and are reset after use.
-  int resize_pending_width;
-  int resize_pending_height;
-
-  int frame_flags;
-
-  search_site_config ss_cfg;
-
-  TileDataEnc *tile_data;
-  int allocated_tiles;  // Keep track of memory allocated for tiles.
-
-  TOKENEXTRA *tile_tok[MAX_TILE_ROWS][MAX_TILE_COLS];
-  unsigned int tok_count[MAX_TILE_ROWS][MAX_TILE_COLS];
-  TOKENLIST *tplist[MAX_TILE_ROWS][MAX_TILE_COLS];
-
-  TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
-
-  int resize_state;
-  int resize_avg_qp;
-  int resize_buffer_underflow;
-  int resize_count;
-
-  // Sequence parameters have been transmitted already and locked
-  // or not. Once locked av1_change_config cannot change the seq
-  // parameters.
-  int seq_params_locked;
-
-  // VARIANCE_AQ segment map refresh
-  int vaq_refresh;
-
-  // Multi-threading
-  int num_workers;
-  AVxWorker *workers;
-  struct EncWorkerData *tile_thr_data;
-  int refresh_frame_mask;
-  int existing_fb_idx_to_show;
-  int is_arf_filter_off[MAX_EXT_ARFS + 1];
-  int num_extra_arfs;
-  int arf_pos_in_gf[MAX_EXT_ARFS + 1];
-  int arf_pos_for_ovrly[MAX_EXT_ARFS + 1];
-  int global_motion_search_done;
-  tran_low_t *tcoeff_buf[MAX_MB_PLANE];
-  int extra_arf_allowed;
-  // A flag to indicate if intrabc is ever used in current frame.
-  int intrabc_used;
-  int dv_cost[2][MV_VALS];
-  // TODO(huisu@google.com): we can update dv_joint_cost per SB.
-  int dv_joint_cost[MV_JOINTS];
-  int has_lossless_segment;
-
-  // For frame refs short signaling:
-  //   A mapping of each reference frame from its encoder side value to the
-  //   decoder side value obtained following the short signaling procedure.
-  int ref_conv[REF_FRAMES];
-
-  AV1LfSync lf_row_sync;
-  AV1LrSync lr_row_sync;
-  AV1LrStruct lr_ctxt;
-
-  aom_film_grain_table_t *film_grain_table;
-#if CONFIG_DENOISE
-  struct aom_denoise_and_model_t *denoise_and_model;
-#endif
-  // Stores the default value of skip flag depending on chroma format
-  // Set as 1 for monochrome and 3 for other color formats
-  int default_interp_skip_flags;
-  int preserve_arf_as_gld;
-} AV1_COMP;
-
-// Must not be called more than once.
-void av1_initialize_enc(void);
-
-struct AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
-                                       BufferPool *const pool);
-void av1_remove_compressor(AV1_COMP *cpi);
-
-void av1_change_config(AV1_COMP *cpi, const AV1EncoderConfig *oxcf);
-
-// receive a frames worth of data. caller can assume that a copy of this
-// frame is made and not just a copy of the pointer..
-int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
-                          YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
-                          int64_t end_time_stamp);
-
-int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
-                            size_t *size, uint8_t *dest, int64_t *time_stamp,
-                            int64_t *time_end, int flush,
-                            const aom_rational_t *timebase);
-
-int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest);
-
-int av1_get_last_show_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *frame);
-
-aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm,
-                                       YV12_BUFFER_CONFIG *new_frame,
-                                       YV12_BUFFER_CONFIG *sd);
-
-int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags);
-
-void av1_update_reference(AV1_COMP *cpi, int ref_frame_flags);
-
-int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
-
-int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
-
-int av1_update_entropy(AV1_COMP *cpi, int update);
-
-int av1_set_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
-
-int av1_get_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
-
-int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
-                          AOM_SCALING vert_mode);
-
-int av1_get_quantizer(struct AV1_COMP *cpi);
-
-int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size);
-
-int64_t timebase_units_to_ticks(const aom_rational_t *timebase, int64_t n);
-int64_t ticks_to_timebase_units(const aom_rational_t *timebase, int64_t n);
-
-static INLINE int frame_is_kf_gf_arf(const AV1_COMP *cpi) {
-  return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
-         (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
-}
-
-static INLINE int get_ref_frame_map_idx(const AV1_COMP *cpi,
-                                        MV_REFERENCE_FRAME ref_frame) {
-  return (ref_frame >= 1) ? cpi->ref_fb_idx[ref_frame - 1] : INVALID_IDX;
-}
-
-static INLINE int get_ref_frame_buf_idx(const AV1_COMP *cpi,
-                                        MV_REFERENCE_FRAME ref_frame) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
-  return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX;
-}
-
-// TODO(huisu@google.com, youzhou@microsoft.com): enable hash-me for HBD.
-static INLINE int av1_use_hash_me(const AV1_COMMON *const cm) {
-  return cm->allow_screen_content_tools;
-}
-
-static INLINE hash_table *av1_get_ref_frame_hash_map(
-    const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
-  return buf_idx != INVALID_IDX
-             ? &cm->buffer_pool->frame_bufs[buf_idx].hash_table
-             : NULL;
-}
-
-static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
-    const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
-  return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf
-                                : NULL;
-}
-
-static INLINE int enc_is_ref_frame_buf(AV1_COMP *cpi, RefCntBuffer *frame_buf) {
-  MV_REFERENCE_FRAME ref_frame;
-  AV1_COMMON *const cm = &cpi->common;
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
-    if (buf_idx == INVALID_IDX) continue;
-    if (frame_buf == &cm->buffer_pool->frame_bufs[buf_idx]) break;
-  }
-  return (ref_frame <= ALTREF_FRAME);
-}
-
-// Token buffer is only used for palette tokens.
-static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols,
-                                           int sb_size_log2,
-                                           const int num_planes) {
-  // Calculate the maximum number of max superblocks in the image.
-  const int shift = sb_size_log2 - 4;
-  const int sb_size = 1 << sb_size_log2;
-  const int sb_size_square = sb_size * sb_size;
-  const int sb_rows = ALIGN_POWER_OF_TWO(mb_rows, shift) >> shift;
-  const int sb_cols = ALIGN_POWER_OF_TWO(mb_cols, shift) >> shift;
-
-  // One palette token for each pixel. There can be palettes on two planes.
-  const int sb_palette_toks = AOMMIN(2, num_planes) * sb_size_square;
-
-  return sb_rows * sb_cols * sb_palette_toks;
-}
-
-// Get the allocated token size for a tile. It does the same calculation as in
-// the frame token allocation.
-static INLINE unsigned int allocated_tokens(TileInfo tile, int sb_size_log2,
-                                            int num_planes) {
-  int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 2) >> 2;
-  int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 2) >> 2;
-
-  return get_token_alloc(tile_mb_rows, tile_mb_cols, sb_size_log2, num_planes);
-}
-
-static INLINE void get_start_tok(AV1_COMP *cpi, int tile_row, int tile_col,
-                                 int mi_row, TOKENEXTRA **tok, int sb_size_log2,
-                                 int num_planes) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int tile_cols = cm->tile_cols;
-  TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
-  const TileInfo *const tile_info = &this_tile->tile_info;
-
-  const int tile_mb_cols =
-      (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
-  const int tile_mb_row = (mi_row - tile_info->mi_row_start + 2) >> 2;
-
-  *tok = cpi->tile_tok[tile_row][tile_col] +
-         get_token_alloc(tile_mb_row, tile_mb_cols, sb_size_log2, num_planes);
-}
-
-void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags);
-
-#define ALT_MIN_LAG 3
-static INLINE int is_altref_enabled(const AV1_COMP *const cpi) {
-  return cpi->oxcf.lag_in_frames >= ALT_MIN_LAG && cpi->oxcf.enable_auto_arf;
-}
-
-// TODO(zoeliu): To set up cpi->oxcf.enable_auto_brf
-
-static INLINE void set_ref_ptrs(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                MV_REFERENCE_FRAME ref0,
-                                MV_REFERENCE_FRAME ref1) {
-  xd->block_refs[0] =
-      &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0];
-  xd->block_refs[1] =
-      &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME : 0];
-}
-
-static INLINE int get_chessboard_index(int frame_index) {
-  return frame_index & 0x1;
-}
-
-static INLINE int *cond_cost_list(const struct AV1_COMP *cpi, int *cost_list) {
-  return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL;
-}
-
-void av1_new_framerate(AV1_COMP *cpi, double framerate);
-
-#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
-
-// Update up-sampled reference frame index.
-static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
-                               int new_uidx) {
-  const int ref_index = *uidx;
-
-  if (ref_index >= 0 && ubufs[ref_index].ref_count > 0)
-    ubufs[ref_index].ref_count--;
-
-  *uidx = new_uidx;
-  ubufs[new_uidx].ref_count++;
-}
-
-// Returns 1 if a frame is scaled and 0 otherwise.
-static INLINE int av1_resize_scaled(const AV1_COMMON *cm) {
-  return !(cm->superres_upscaled_width == cm->render_width &&
-           cm->superres_upscaled_height == cm->render_height);
-}
-
-static INLINE int av1_frame_scaled(const AV1_COMMON *cm) {
-  return !av1_superres_scaled(cm) && av1_resize_scaled(cm);
-}
-
-// Don't allow a show_existing_frame to coincide with an error resilient
-// frame. An exception can be made for a forward keyframe since it has no
-// previous dependencies.
-static INLINE int encode_show_existing_frame(const AV1_COMMON *cm) {
-  return cm->show_existing_frame &&
-         (!cm->error_resilient_mode || cm->frame_type == KEY_FRAME);
-}
-
-// Returns a Sequence Header OBU stored in an aom_fixed_buf_t, or NULL upon
-// failure. When a non-NULL aom_fixed_buf_t pointer is returned by this
-// function, the memory must be freed by the caller. Both the buf member of the
-// aom_fixed_buf_t, and the aom_fixed_buf_t pointer itself must be freed. Memory
-// returned must be freed via call to free().
-//
-// Note: The OBU returned is in Low Overhead Bitstream Format. Specifically,
-// the obu_has_size_field bit is set, and the buffer contains the obu_size
-// field.
-aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_ENCODER_H_
diff --git a/third_party/aom/av1/encoder/encodetxb.c b/third_party/aom/av1/encoder/encodetxb.c
deleted file mode 100644
index 5a31d93d7..000000000
--- a/third_party/aom/av1/encoder/encodetxb.c
+++ /dev/null
@@ -1,2062 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/encodetxb.h"
-
-#include "aom_ports/mem.h"
-#include "av1/common/blockd.h"
-#include "av1/common/idct.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/scan.h"
-#include "av1/encoder/bitstream.h"
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/hash.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/tokenize.h"
-
-static int hbt_needs_init = 1;
-static CRC32C crc_calculator;
-static const int HBT_EOB = 16;            // also the length in opt_qcoeff
-static const int HBT_TABLE_SIZE = 65536;  // 16 bit: holds 65536 'arrays'
-static const int HBT_ARRAY_LENGTH = 256;  // 8 bit: 256 entries
-// If removed in hbt_create_hashes or increased beyond int8_t, widen deltas type
-static const int HBT_KICKOUT = 3;
-
-typedef struct OptTxbQcoeff {
-  // Use larger type if larger/no kickout value is used in hbt_create_hashes
-  int8_t deltas[16];
-  uint32_t hbt_qc_hash;
-  uint32_t hbt_ctx_hash;
-  int init;
-  int rate_cost;
-} OptTxbQcoeff;
-
-OptTxbQcoeff *hbt_hash_table;
-
-typedef struct LevelDownStats {
-  int update;
-  tran_low_t low_qc;
-  tran_low_t low_dqc;
-  int64_t dist0;
-  int rate;
-  int rate_low;
-  int64_t dist;
-  int64_t dist_low;
-  int64_t rd;
-  int64_t rd_low;
-  int64_t nz_rd;
-  int64_t rd_diff;
-  int cost_diff;
-  int64_t dist_diff;
-  int new_eob;
-} LevelDownStats;
-
-void av1_alloc_txb_buf(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) *
-             ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1);
-
-  av1_free_txb_buf(cpi);
-  // TODO(jingning): This should be further reduced.
-  CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
-                  aom_memalign(32, sizeof(*cpi->coeff_buffer_base) * size));
-}
-
-void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); }
-
-void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                          int mi_row, int mi_col) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  int mib_size_log2 = cm->seq_params.mib_size_log2;
-  int stride = (cm->mi_cols >> mib_size_log2) + 1;
-  int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
-  CB_COEFF_BUFFER *coeff_buf = &cpi->coeff_buffer_base[offset];
-  const int txb_offset = x->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
-  assert(x->cb_offset < (1 << num_pels_log2_lookup[cm->seq_params.sb_size]));
-  for (int plane = 0; plane < num_planes; ++plane) {
-    x->mbmi_ext->tcoeff[plane] = coeff_buf->tcoeff[plane] + x->cb_offset;
-    x->mbmi_ext->eobs[plane] = coeff_buf->eobs[plane] + txb_offset;
-    x->mbmi_ext->txb_skip_ctx[plane] =
-        coeff_buf->txb_skip_ctx[plane] + txb_offset;
-    x->mbmi_ext->dc_sign_ctx[plane] =
-        coeff_buf->dc_sign_ctx[plane] + txb_offset;
-  }
-}
-
-static void write_golomb(aom_writer *w, int level) {
-  int x = level + 1;
-  int i = x;
-  int length = 0;
-
-  while (i) {
-    i >>= 1;
-    ++length;
-  }
-  assert(length > 0);
-
-  for (i = 0; i < length - 1; ++i) aom_write_bit(w, 0);
-
-  for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01);
-}
-
-static INLINE tran_low_t get_lower_coeff(tran_low_t qc) {
-  if (qc == 0) {
-    return 0;
-  }
-  return qc > 0 ? qc - 1 : qc + 1;
-}
-
-static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int coeff_idx,
-                                           int dqv, int shift,
-                                           const qm_val_t *iqmatrix) {
-  int sign = qc < 0 ? -1 : 1;
-  if (iqmatrix != NULL)
-    dqv =
-        ((iqmatrix[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-  return sign * ((abs(qc) * dqv) >> shift);
-}
-
-static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
-                                     int shift) {
-  const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
-  const int64_t error = diff * diff;
-  return error;
-}
-
-static const int8_t eob_to_pos_small[33] = {
-  0, 1, 2,                                        // 0-2
-  3, 3,                                           // 3-4
-  4, 4, 4, 4,                                     // 5-8
-  5, 5, 5, 5, 5, 5, 5, 5,                         // 9-16
-  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6  // 17-32
-};
-
-static const int8_t eob_to_pos_large[17] = {
-  6,                               // place holder
-  7,                               // 33-64
-  8,  8,                           // 65-128
-  9,  9,  9,  9,                   // 129-256
-  10, 10, 10, 10, 10, 10, 10, 10,  // 257-512
-  11                               // 513-
-};
-
-static INLINE int get_eob_pos_token(const int eob, int *const extra) {
-  int t;
-
-  if (eob < 33) {
-    t = eob_to_pos_small[eob];
-  } else {
-    const int e = AOMMIN((eob - 1) >> 5, 16);
-    t = eob_to_pos_large[e];
-  }
-
-  *extra = eob - k_eob_group_start[t];
-
-  return t;
-}
-
-#if CONFIG_ENTROPY_STATS
-void av1_update_eob_context(int cdf_idx, int eob, TX_SIZE tx_size,
-                            TX_CLASS tx_class, PLANE_TYPE plane,
-                            FRAME_CONTEXT *ec_ctx, FRAME_COUNTS *counts,
-                            uint8_t allow_update_cdf) {
-#else
-void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class,
-                            PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx,
-                            uint8_t allow_update_cdf) {
-#endif
-  int eob_extra;
-  const int eob_pt = get_eob_pos_token(eob, &eob_extra);
-  TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-
-  const int eob_multi_size = txsize_log2_minus4[tx_size];
-  const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
-
-  switch (eob_multi_size) {
-    case 0:
-#if CONFIG_ENTROPY_STATS
-      ++counts->eob_multi16[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
-      if (allow_update_cdf)
-        update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
-      break;
-    case 1:
-#if CONFIG_ENTROPY_STATS
-      ++counts->eob_multi32[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
-      if (allow_update_cdf)
-        update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
-      break;
-    case 2:
-#if CONFIG_ENTROPY_STATS
-      ++counts->eob_multi64[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
-      if (allow_update_cdf)
-        update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
-      break;
-    case 3:
-#if CONFIG_ENTROPY_STATS
-      ++counts->eob_multi128[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
-      if (allow_update_cdf) {
-        update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1,
-                   8);
-      }
-      break;
-    case 4:
-#if CONFIG_ENTROPY_STATS
-      ++counts->eob_multi256[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
-      if (allow_update_cdf) {
-        update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1,
-                   9);
-      }
-      break;
-    case 5:
-#if CONFIG_ENTROPY_STATS
-      ++counts->eob_multi512[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
-      if (allow_update_cdf) {
-        update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1,
-                   10);
-      }
-      break;
-    case 6:
-    default:
-#if CONFIG_ENTROPY_STATS
-      ++counts->eob_multi1024[cdf_idx][plane][eob_multi_ctx][eob_pt - 1];
-#endif
-      if (allow_update_cdf) {
-        update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1,
-                   11);
-      }
-      break;
-  }
-
-  if (k_eob_offset_bits[eob_pt] > 0) {
-    int eob_ctx = eob_pt - 3;
-    int eob_shift = k_eob_offset_bits[eob_pt] - 1;
-    int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
-#if CONFIG_ENTROPY_STATS
-    counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++;
-#endif  // CONFIG_ENTROPY_STATS
-    if (allow_update_cdf)
-      update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2);
-  }
-}
-
-static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs,
-                        const LV_MAP_COEFF_COST *txb_costs, TX_CLASS tx_class) {
-  int eob_extra;
-  const int eob_pt = get_eob_pos_token(eob, &eob_extra);
-  int eob_cost = 0;
-  const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
-  eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
-
-  if (k_eob_offset_bits[eob_pt] > 0) {
-    const int eob_ctx = eob_pt - 3;
-    const int eob_shift = k_eob_offset_bits[eob_pt] - 1;
-    const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
-    eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit];
-    const int offset_bits = k_eob_offset_bits[eob_pt];
-    if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1);
-  }
-  return eob_cost;
-}
-
-static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx,
-                                    const int (*dc_sign_cost)[2],
-                                    int dc_sign_ctx) {
-  if (coeff_idx == 0) {
-    const int sign = (qc < 0) ? 1 : 0;
-    return dc_sign_cost[dc_sign_ctx][sign];
-  }
-  return av1_cost_literal(1);
-}
-
-static INLINE int get_br_cost(tran_low_t abs_qc, int ctx,
-                              const int *coeff_lps) {
-  const tran_low_t min_level = 1 + NUM_BASE_LEVELS;
-  const tran_low_t max_level = 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE;
-  (void)ctx;
-  if (abs_qc >= min_level) {
-    if (abs_qc >= max_level) {
-      return coeff_lps[COEFF_BASE_RANGE];  // COEFF_BASE_RANGE * cost0;
-    } else {
-      return coeff_lps[(abs_qc - min_level)];  //  * cost0 + cost1;
-    }
-  }
-  return 0;
-}
-
-static INLINE int get_golomb_cost(int abs_qc) {
-  if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
-    const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
-    const int length = get_msb(r) + 1;
-    return av1_cost_literal(2 * length - 1);
-  }
-  return 0;
-}
-
-static int get_coeff_cost(const tran_low_t qc, const int scan_idx,
-                          const int is_eob, const TxbInfo *const txb_info,
-                          const LV_MAP_COEFF_COST *const txb_costs,
-                          const int coeff_ctx, const TX_CLASS tx_class) {
-  const TXB_CTX *const txb_ctx = txb_info->txb_ctx;
-  const int is_nz = (qc != 0);
-  const tran_low_t abs_qc = abs(qc);
-  int cost = 0;
-  const int16_t *const scan = txb_info->scan_order->scan;
-  const int pos = scan[scan_idx];
-
-  if (is_eob) {
-    cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
-  } else {
-    cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
-  }
-  if (is_nz) {
-    cost += get_sign_bit_cost(qc, scan_idx, txb_costs->dc_sign_cost,
-                              txb_ctx->dc_sign_ctx);
-
-    if (abs_qc > NUM_BASE_LEVELS) {
-      const int ctx =
-          get_br_ctx(txb_info->levels, pos, txb_info->bwl, tx_class);
-      cost += get_br_cost(abs_qc, ctx, txb_costs->lps_cost[ctx]);
-      cost += get_golomb_cost(abs_qc);
-    }
-  }
-  return cost;
-}
-
-static INLINE int get_nz_map_ctx(const uint8_t *const levels,
-                                 const int coeff_idx, const int bwl,
-                                 const int height, const int scan_idx,
-                                 const int is_eob, const TX_SIZE tx_size,
-                                 const TX_CLASS tx_class) {
-  if (is_eob) {
-    if (scan_idx == 0) return 0;
-    if (scan_idx <= (height << bwl) / 8) return 1;
-    if (scan_idx <= (height << bwl) / 4) return 2;
-    return 3;
-  }
-  const int stats =
-      get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
-  return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class);
-}
-
-static void get_dist_cost_stats(LevelDownStats *const stats, const int scan_idx,
-                                const int is_eob,
-                                const LV_MAP_COEFF_COST *const txb_costs,
-                                const TxbInfo *const txb_info,
-                                const TX_CLASS tx_class) {
-  const int16_t *const scan = txb_info->scan_order->scan;
-  const int coeff_idx = scan[scan_idx];
-  const tran_low_t qc = txb_info->qcoeff[coeff_idx];
-  const uint8_t *const levels = txb_info->levels;
-  stats->new_eob = -1;
-  stats->update = 0;
-  stats->rd_low = 0;
-  stats->rd = 0;
-  stats->nz_rd = 0;
-  stats->dist_low = 0;
-  stats->rate_low = 0;
-  stats->low_qc = 0;
-
-  const tran_low_t tqc = txb_info->tcoeff[coeff_idx];
-  const int dqv = txb_info->dequant[coeff_idx != 0];
-  const int coeff_ctx =
-      get_nz_map_ctx(levels, coeff_idx, txb_info->bwl, txb_info->height,
-                     scan_idx, is_eob, txb_info->tx_size, tx_class);
-  const int qc_cost = get_coeff_cost(qc, scan_idx, is_eob, txb_info, txb_costs,
-                                     coeff_ctx, tx_class);
-  assert(qc != 0);
-  const tran_low_t dqc = qcoeff_to_dqcoeff(qc, coeff_idx, dqv, txb_info->shift,
-                                           txb_info->iqmatrix);
-  const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift);
-
-  // distortion difference when coefficient is quantized to 0
-  const tran_low_t dqc0 =
-      qcoeff_to_dqcoeff(0, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
-
-  stats->dist0 = get_coeff_dist(tqc, dqc0, txb_info->shift);
-  stats->dist = dqc_dist - stats->dist0;
-  stats->rate = qc_cost;
-
-  stats->rd = RDCOST(txb_info->rdmult, stats->rate, stats->dist);
-
-  stats->low_qc = get_lower_coeff(qc);
-
-  if (is_eob && stats->low_qc == 0) {
-    stats->rd_low = stats->rd;  // disable selection of low_qc in this case.
-  } else {
-    if (stats->low_qc == 0) {
-      stats->dist_low = 0;
-    } else {
-      stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc, coeff_idx, dqv,
-                                         txb_info->shift, txb_info->iqmatrix);
-      const int64_t low_dqc_dist =
-          get_coeff_dist(tqc, stats->low_dqc, txb_info->shift);
-      stats->dist_low = low_dqc_dist - stats->dist0;
-    }
-    const int low_qc_cost =
-        get_coeff_cost(stats->low_qc, scan_idx, is_eob, txb_info, txb_costs,
-                       coeff_ctx, tx_class);
-    stats->rate_low = low_qc_cost;
-    stats->rd_low = RDCOST(txb_info->rdmult, stats->rate_low, stats->dist_low);
-  }
-}
-
-static void get_dist_cost_stats_with_eob(
-    LevelDownStats *const stats, const int scan_idx,
-    const LV_MAP_COEFF_COST *const txb_costs, const TxbInfo *const txb_info,
-    const TX_CLASS tx_class) {
-  const int is_eob = 0;
-  get_dist_cost_stats(stats, scan_idx, is_eob, txb_costs, txb_info, tx_class);
-
-  const int16_t *const scan = txb_info->scan_order->scan;
-  const int coeff_idx = scan[scan_idx];
-  const tran_low_t qc = txb_info->qcoeff[coeff_idx];
-  const int coeff_ctx_temp = get_nz_map_ctx(
-      txb_info->levels, coeff_idx, txb_info->bwl, txb_info->height, scan_idx, 1,
-      txb_info->tx_size, tx_class);
-  const int qc_eob_cost = get_coeff_cost(qc, scan_idx, 1, txb_info, txb_costs,
-                                         coeff_ctx_temp, tx_class);
-  int64_t rd_eob = RDCOST(txb_info->rdmult, qc_eob_cost, stats->dist);
-  if (stats->low_qc != 0) {
-    const int low_qc_eob_cost =
-        get_coeff_cost(stats->low_qc, scan_idx, 1, txb_info, txb_costs,
-                       coeff_ctx_temp, tx_class);
-    int64_t rd_eob_low =
-        RDCOST(txb_info->rdmult, low_qc_eob_cost, stats->dist_low);
-    rd_eob = (rd_eob > rd_eob_low) ? rd_eob_low : rd_eob;
-  }
-
-  stats->nz_rd = AOMMIN(stats->rd_low, stats->rd) - rd_eob;
-}
-
-static INLINE void update_qcoeff(const int coeff_idx, const tran_low_t qc,
-                                 const TxbInfo *const txb_info) {
-  txb_info->qcoeff[coeff_idx] = qc;
-  txb_info->levels[get_padded_idx(coeff_idx, txb_info->bwl)] =
-      (uint8_t)clamp(abs(qc), 0, INT8_MAX);
-}
-
-static INLINE void update_coeff(const int coeff_idx, const tran_low_t qc,
-                                const TxbInfo *const txb_info) {
-  update_qcoeff(coeff_idx, qc, txb_info);
-  const int dqv = txb_info->dequant[coeff_idx != 0];
-  txb_info->dqcoeff[coeff_idx] = qcoeff_to_dqcoeff(
-      qc, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
-}
-
-void av1_txb_init_levels_c(const tran_low_t *const coeff, const int width,
-                           const int height, uint8_t *const levels) {
-  const int stride = width + TX_PAD_HOR;
-  uint8_t *ls = levels;
-
-  memset(levels - TX_PAD_TOP * stride, 0,
-         sizeof(*levels) * TX_PAD_TOP * stride);
-  memset(levels + stride * height, 0,
-         sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END));
-
-  for (int i = 0; i < height; i++) {
-    for (int j = 0; j < width; j++) {
-      *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
-    }
-    for (int j = 0; j < TX_PAD_HOR; j++) {
-      *ls++ = 0;
-    }
-  }
-}
-
-void av1_get_nz_map_contexts_c(const uint8_t *const levels,
-                               const int16_t *const scan, const uint16_t eob,
-                               const TX_SIZE tx_size, const TX_CLASS tx_class,
-                               int8_t *const coeff_contexts) {
-  const int bwl = get_txb_bwl(tx_size);
-  const int height = get_txb_high(tx_size);
-  for (int i = 0; i < eob; ++i) {
-    const int pos = scan[i];
-    coeff_contexts[pos] = get_nz_map_ctx(levels, pos, bwl, height, i,
-                                         i == eob - 1, tx_size, tx_class);
-  }
-}
-
-void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
-                          aom_writer *w, int blk_row, int blk_col, int plane,
-                          TX_SIZE tx_size, const tran_low_t *tcoeff,
-                          uint16_t eob, TXB_CTX *txb_ctx) {
-  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  aom_write_symbol(w, eob == 0,
-                   ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2);
-  if (eob == 0) return;
-  const PLANE_TYPE plane_type = get_plane_type(plane);
-  const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
-                                          tx_size, cm->reduced_tx_set_used);
-  const TX_CLASS tx_class = tx_type_to_class[tx_type];
-  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
-  const int16_t *const scan = scan_order->scan;
-  int c;
-  const int bwl = get_txb_bwl(tx_size);
-  const int width = get_txb_wide(tx_size);
-  const int height = get_txb_high(tx_size);
-
-  uint8_t levels_buf[TX_PAD_2D];
-  uint8_t *const levels = set_levels(levels_buf, width);
-  DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
-  av1_txb_init_levels(tcoeff, width, height, levels);
-
-  av1_write_tx_type(cm, xd, blk_row, blk_col, plane, tx_size, w);
-
-  int eob_extra;
-  const int eob_pt = get_eob_pos_token(eob, &eob_extra);
-  const int eob_multi_size = txsize_log2_minus4[tx_size];
-  const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
-  switch (eob_multi_size) {
-    case 0:
-      aom_write_symbol(w, eob_pt - 1,
-                       ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx], 5);
-      break;
-    case 1:
-      aom_write_symbol(w, eob_pt - 1,
-                       ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx], 6);
-      break;
-    case 2:
-      aom_write_symbol(w, eob_pt - 1,
-                       ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx], 7);
-      break;
-    case 3:
-      aom_write_symbol(w, eob_pt - 1,
-                       ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx], 8);
-      break;
-    case 4:
-      aom_write_symbol(w, eob_pt - 1,
-                       ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx], 9);
-      break;
-    case 5:
-      aom_write_symbol(w, eob_pt - 1,
-                       ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx], 10);
-      break;
-    default:
-      aom_write_symbol(w, eob_pt - 1,
-                       ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11);
-      break;
-  }
-
-  if (k_eob_offset_bits[eob_pt] > 0) {
-    const int eob_ctx = eob_pt - 3;
-    int eob_shift = k_eob_offset_bits[eob_pt] - 1;
-    int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
-    aom_write_symbol(w, bit,
-                     ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2);
-    for (int i = 1; i < k_eob_offset_bits[eob_pt]; i++) {
-      eob_shift = k_eob_offset_bits[eob_pt] - 1 - i;
-      bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
-      aom_write_bit(w, bit);
-    }
-  }
-
-  av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
-
-  for (c = eob - 1; c >= 0; --c) {
-    const int pos = scan[c];
-    const int coeff_ctx = coeff_contexts[pos];
-    const tran_low_t v = tcoeff[pos];
-    const tran_low_t level = abs(v);
-
-    if (c == eob - 1) {
-      aom_write_symbol(
-          w, AOMMIN(level, 3) - 1,
-          ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], 3);
-    } else {
-      aom_write_symbol(w, AOMMIN(level, 3),
-                       ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
-                       4);
-    }
-    if (level > NUM_BASE_LEVELS) {
-      // level is above 1.
-      const int base_range = level - 1 - NUM_BASE_LEVELS;
-      const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
-      for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-        const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
-        aom_write_symbol(
-            w, k,
-            ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx],
-            BR_CDF_SIZE);
-        if (k < BR_CDF_SIZE - 1) break;
-      }
-    }
-  }
-
-  // Loop to code all signs in the transform block,
-  // starting with the sign of DC (if applicable)
-  for (c = 0; c < eob; ++c) {
-    const tran_low_t v = tcoeff[scan[c]];
-    const tran_low_t level = abs(v);
-    const int sign = (v < 0) ? 1 : 0;
-    if (level) {
-      if (c == 0) {
-        aom_write_symbol(
-            w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2);
-      } else {
-        aom_write_bit(w, sign);
-      }
-      if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
-        write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
-    }
-  }
-}
-
-typedef struct encode_txb_args {
-  const AV1_COMMON *cm;
-  MACROBLOCK *x;
-  aom_writer *w;
-} ENCODE_TXB_ARGS;
-
-static void write_coeffs_txb_wrap(const AV1_COMMON *cm, MACROBLOCK *x,
-                                  aom_writer *w, int plane, int block,
-                                  int blk_row, int blk_col, TX_SIZE tx_size) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block);
-  uint16_t eob = x->mbmi_ext->eobs[plane][block];
-  TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block],
-                      x->mbmi_ext->dc_sign_ctx[plane][block] };
-  av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob,
-                       &txb_ctx);
-}
-
-void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row,
-                         int mi_col, aom_writer *w, BLOCK_SIZE bsize) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  const int num_planes = av1_num_planes(cm);
-  int block[MAX_MB_PLANE] = { 0 };
-  int row, col;
-  assert(bsize == get_plane_block_size(bsize, xd->plane[0].subsampling_x,
-                                       xd->plane[0].subsampling_y));
-  const int max_blocks_wide = max_block_wide(xd, bsize, 0);
-  const int max_blocks_high = max_block_high(xd, bsize, 0);
-  const BLOCK_SIZE max_unit_bsize = BLOCK_64X64;
-  int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
-  int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-  mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
-  mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
-
-  for (row = 0; row < max_blocks_high; row += mu_blocks_high) {
-    for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) {
-      for (int plane = 0; plane < num_planes; ++plane) {
-        const struct macroblockd_plane *const pd = &xd->plane[plane];
-        if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
-                                 pd->subsampling_y))
-          continue;
-        const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
-        const int stepr = tx_size_high_unit[tx_size];
-        const int stepc = tx_size_wide_unit[tx_size];
-        const int step = stepr * stepc;
-
-        const int unit_height = ROUND_POWER_OF_TWO(
-            AOMMIN(mu_blocks_high + row, max_blocks_high), pd->subsampling_y);
-        const int unit_width = ROUND_POWER_OF_TWO(
-            AOMMIN(mu_blocks_wide + col, max_blocks_wide), pd->subsampling_x);
-        for (int blk_row = row >> pd->subsampling_y; blk_row < unit_height;
-             blk_row += stepr) {
-          for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width;
-               blk_col += stepc) {
-            write_coeffs_txb_wrap(cm, x, w, plane, block[plane], blk_row,
-                                  blk_col, tx_size);
-            block[plane] += step;
-          }
-        }
-      }
-    }
-  }
-}
-
-// TODO(angiebird): use this function whenever it's possible
-static int get_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
-                            const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
-                            TX_TYPE tx_type) {
-  if (plane > 0) return 0;
-
-  const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
-
-  const MB_MODE_INFO *mbmi = xd->mi[0];
-  const int is_inter = is_inter_block(mbmi);
-  if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
-      !xd->lossless[xd->mi[0]->segment_id]) {
-    const int ext_tx_set =
-        get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
-    if (is_inter) {
-      if (ext_tx_set > 0)
-        return x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type];
-    } else {
-      if (ext_tx_set > 0) {
-        PREDICTION_MODE intra_dir;
-        if (mbmi->filter_intra_mode_info.use_filter_intra)
-          intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
-                                             .filter_intra_mode];
-        else
-          intra_dir = mbmi->mode;
-        return x->intra_tx_type_costs[ext_tx_set][square_tx_size][intra_dir]
-                                     [tx_type];
-      }
-    }
-  }
-  return 0;
-}
-
-static AOM_FORCE_INLINE int warehouse_efficients_txb(
-    const AV1_COMMON *const cm, const MACROBLOCK *x, const int plane,
-    const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx,
-    const struct macroblock_plane *p, const int eob,
-    const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs,
-    const MACROBLOCKD *const xd, const TX_TYPE tx_type,
-    const TX_CLASS tx_class) {
-  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  const int txb_skip_ctx = txb_ctx->txb_skip_ctx;
-  const int bwl = get_txb_bwl(tx_size);
-  const int width = get_txb_wide(tx_size);
-  const int height = get_txb_high(tx_size);
-  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
-  const int16_t *const scan = scan_order->scan;
-  uint8_t levels_buf[TX_PAD_2D];
-  uint8_t *const levels = set_levels(levels_buf, width);
-  DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
-  const int eob_multi_size = txsize_log2_minus4[tx_size];
-  const LV_MAP_EOB_COST *const eob_costs =
-      &x->eob_costs[eob_multi_size][plane_type];
-  int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
-
-  av1_txb_init_levels(qcoeff, width, height, levels);
-
-  cost += get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
-
-  cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class);
-
-  av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
-
-  const int(*lps_cost)[COEFF_BASE_RANGE + 1] = coeff_costs->lps_cost;
-  int c = eob - 1;
-  {
-    const int pos = scan[c];
-    const tran_low_t v = qcoeff[pos];
-    const int sign = v >> 31;
-    const int level = (v ^ sign) - sign;
-    const int coeff_ctx = coeff_contexts[pos];
-    cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
-
-    if (v) {
-      // sign bit cost
-      if (level > NUM_BASE_LEVELS) {
-        const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
-        const int base_range =
-            AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
-        cost += lps_cost[ctx][base_range];
-        cost += get_golomb_cost(level);
-      }
-      if (c) {
-        cost += av1_cost_literal(1);
-      } else {
-        const int sign01 = (sign ^ sign) - sign;
-        const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
-        cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
-        return cost;
-      }
-    }
-  }
-  const int(*base_cost)[4] = coeff_costs->base_cost;
-  for (c = eob - 2; c >= 1; --c) {
-    const int pos = scan[c];
-    const int coeff_ctx = coeff_contexts[pos];
-    const tran_low_t v = qcoeff[pos];
-    const int level = abs(v);
-    const int cost0 = base_cost[coeff_ctx][AOMMIN(level, 3)];
-    if (v) {
-      // sign bit cost
-      cost += av1_cost_literal(1);
-      if (level > NUM_BASE_LEVELS) {
-        const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
-        const int base_range =
-            AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
-        cost += lps_cost[ctx][base_range];
-        cost += get_golomb_cost(level);
-      }
-    }
-    cost += cost0;
-  }
-  if (c == 0) {
-    const int pos = scan[c];
-    const tran_low_t v = qcoeff[pos];
-    const int coeff_ctx = coeff_contexts[pos];
-    const int sign = v >> 31;
-    const int level = (v ^ sign) - sign;
-    cost += base_cost[coeff_ctx][AOMMIN(level, 3)];
-
-    if (v) {
-      // sign bit cost
-      const int sign01 = (sign ^ sign) - sign;
-      const int dc_sign_ctx = txb_ctx->dc_sign_ctx;
-      cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
-      if (level > NUM_BASE_LEVELS) {
-        const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
-        const int base_range =
-            AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE);
-        cost += lps_cost[ctx][base_range];
-        cost += get_golomb_cost(level);
-      }
-    }
-  }
-  return cost;
-}
-
-int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x,
-                        const int plane, const int block, const TX_SIZE tx_size,
-                        const TX_TYPE tx_type, const TXB_CTX *const txb_ctx) {
-  const struct macroblock_plane *p = &x->plane[plane];
-  const int eob = p->eobs[block];
-  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-  const PLANE_TYPE plane_type = get_plane_type(plane);
-  const LV_MAP_COEFF_COST *const coeff_costs =
-      &x->coeff_costs[txs_ctx][plane_type];
-  if (eob == 0) {
-    return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
-  }
-
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const TX_CLASS tx_class = tx_type_to_class[tx_type];
-
-#define WAREHOUSE_EFFICIENTS_TXB_CASE(tx_class_literal)                        \
-  case tx_class_literal:                                                       \
-    return warehouse_efficients_txb(cm, x, plane, block, tx_size, txb_ctx, p,  \
-                                    eob, plane_type, coeff_costs, xd, tx_type, \
-                                    tx_class_literal);
-  switch (tx_class) {
-    WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_2D);
-    WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_HORIZ);
-    WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_VERT);
-#undef WAREHOUSE_EFFICIENTS_TXB_CASE
-    default: assert(false); return 0;
-  }
-}
-
-static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
-                        const LV_MAP_EOB_COST *txb_eob_costs, int *rate_cost) {
-  int update = 0;
-  if (txb_info->eob == 0) return update;
-  const int16_t *const scan = txb_info->scan_order->scan;
-  // forward optimize the nz_map`
-  const int init_eob = txb_info->eob;
-  const TX_CLASS tx_class = tx_type_to_class[txb_info->tx_type];
-  const int eob_cost =
-      get_eob_cost(init_eob, txb_eob_costs, txb_costs, tx_class);
-
-  // backward optimize the level-k map
-  int accu_rate = eob_cost;
-  int64_t accu_dist = 0;
-  int64_t prev_eob_rd_cost = INT64_MAX;
-  int64_t cur_eob_rd_cost = 0;
-
-  {
-    const int si = init_eob - 1;
-    const int coeff_idx = scan[si];
-    LevelDownStats stats;
-    get_dist_cost_stats(&stats, si, si == init_eob - 1, txb_costs, txb_info,
-                        tx_class);
-    if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
-      update = 1;
-      update_coeff(coeff_idx, stats.low_qc, txb_info);
-      accu_rate += stats.rate_low;
-      accu_dist += stats.dist_low;
-    } else {
-      accu_rate += stats.rate;
-      accu_dist += stats.dist;
-    }
-  }
-
-  int si = init_eob - 2;
-  int8_t has_nz_tail = 0;
-  // eob is not fixed
-  for (; si >= 0 && has_nz_tail < 2; --si) {
-    assert(si != init_eob - 1);
-    const int coeff_idx = scan[si];
-    tran_low_t qc = txb_info->qcoeff[coeff_idx];
-
-    if (qc == 0) {
-      const int coeff_ctx =
-          get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
-                               txb_info->tx_size, tx_class);
-      accu_rate += txb_costs->base_cost[coeff_ctx][0];
-    } else {
-      LevelDownStats stats;
-      get_dist_cost_stats_with_eob(&stats, si, txb_costs, txb_info, tx_class);
-      // check if it is better to make this the last significant coefficient
-      int cur_eob_rate =
-          get_eob_cost(si + 1, txb_eob_costs, txb_costs, tx_class);
-      cur_eob_rd_cost = RDCOST(txb_info->rdmult, cur_eob_rate, 0);
-      prev_eob_rd_cost =
-          RDCOST(txb_info->rdmult, accu_rate, accu_dist) + stats.nz_rd;
-      if (cur_eob_rd_cost <= prev_eob_rd_cost) {
-        update = 1;
-        for (int j = si + 1; j < txb_info->eob; j++) {
-          const int coeff_pos_j = scan[j];
-          update_coeff(coeff_pos_j, 0, txb_info);
-        }
-        txb_info->eob = si + 1;
-
-        // rerun cost calculation due to change of eob
-        accu_rate = cur_eob_rate;
-        accu_dist = 0;
-        get_dist_cost_stats(&stats, si, 1, txb_costs, txb_info, tx_class);
-        if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) {
-          update = 1;
-          update_coeff(coeff_idx, stats.low_qc, txb_info);
-          accu_rate += stats.rate_low;
-          accu_dist += stats.dist_low;
-        } else {
-          accu_rate += stats.rate;
-          accu_dist += stats.dist;
-        }
-
-        // reset non zero tail when new eob is found
-        has_nz_tail = 0;
-      } else {
-        int bUpdCoeff = 0;
-        if (stats.rd_low < stats.rd) {
-          if ((si < txb_info->eob - 1)) {
-            bUpdCoeff = 1;
-            update = 1;
-          }
-        } else {
-          ++has_nz_tail;
-        }
-
-        if (bUpdCoeff) {
-          update_coeff(coeff_idx, stats.low_qc, txb_info);
-          accu_rate += stats.rate_low;
-          accu_dist += stats.dist_low;
-        } else {
-          accu_rate += stats.rate;
-          accu_dist += stats.dist;
-        }
-      }
-    }
-  }  // for (si)
-
-  // eob is fixed
-  for (; si >= 0; --si) {
-    assert(si != init_eob - 1);
-    const int coeff_idx = scan[si];
-    tran_low_t qc = txb_info->qcoeff[coeff_idx];
-
-    if (qc == 0) {
-      const int coeff_ctx =
-          get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl,
-                               txb_info->tx_size, tx_class);
-      accu_rate += txb_costs->base_cost[coeff_ctx][0];
-    } else {
-      LevelDownStats stats;
-      get_dist_cost_stats(&stats, si, 0, txb_costs, txb_info, tx_class);
-
-      int bUpdCoeff = 0;
-      if (stats.rd_low < stats.rd) {
-        if ((si < txb_info->eob - 1)) {
-          bUpdCoeff = 1;
-          update = 1;
-        }
-      }
-      if (bUpdCoeff) {
-        update_coeff(coeff_idx, stats.low_qc, txb_info);
-        accu_rate += stats.rate_low;
-        accu_dist += stats.dist_low;
-      } else {
-        accu_rate += stats.rate;
-        accu_dist += stats.dist;
-      }
-    }
-  }  // for (si)
-
-  int non_zero_blk_rate =
-      txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][0];
-  prev_eob_rd_cost =
-      RDCOST(txb_info->rdmult, accu_rate + non_zero_blk_rate, accu_dist);
-
-  int zero_blk_rate =
-      txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][1];
-  int64_t zero_blk_rd_cost = RDCOST(txb_info->rdmult, zero_blk_rate, 0);
-  if (zero_blk_rd_cost <= prev_eob_rd_cost) {
-    update = 1;
-    for (int j = 0; j < txb_info->eob; j++) {
-      const int coeff_pos_j = scan[j];
-      update_coeff(coeff_pos_j, 0, txb_info);
-    }
-    txb_info->eob = 0;
-  }
-
-  // record total rate cost
-  *rate_cost = zero_blk_rd_cost <= prev_eob_rd_cost
-                   ? zero_blk_rate
-                   : accu_rate + non_zero_blk_rate;
-
-  if (txb_info->eob > 0) {
-    *rate_cost += txb_info->tx_type_cost;
-  }
-
-  return update;
-}
-
-// These numbers are empirically obtained.
-static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
-  { 17, 13 },
-  { 16, 10 },
-};
-
-void hbt_init() {
-  hbt_hash_table =
-      aom_malloc(sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH);
-  memset(hbt_hash_table, 0,
-         sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH);
-  av1_crc32c_calculator_init(&crc_calculator);  // 31 bit: qc & ctx
-
-  hbt_needs_init = 0;
-}
-
-void hbt_destroy() { aom_free(hbt_hash_table); }
-
-int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash,
-                  TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
-                  const LV_MAP_EOB_COST *txb_eob_costs,
-                  const struct macroblock_plane *p, int block, int fast_mode,
-                  int *rate_cost) {
-  (void)fast_mode;
-  const int16_t *scan = txb_info->scan_order->scan;
-  int prev_eob = txb_info->eob;
-  assert(HBT_EOB <= 16);  // Lengthen array if allowing longer eob.
-  int32_t prev_coeff[16];
-  for (int i = 0; i < prev_eob; i++) {
-    prev_coeff[i] = txb_info->qcoeff[scan[i]];
-  }
-  for (int i = prev_eob; i < HBT_EOB; i++) {
-    prev_coeff[i] = 0;  // For compiler piece of mind.
-  }
-
-  av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
-                      txb_info->levels);
-
-  const int update =
-      optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost);
-
-  // Overwrite old entry
-  uint16_t hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE;
-  uint16_t hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH;
-  hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-      .rate_cost = *rate_cost;
-  hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index].init = 1;
-  hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-      .hbt_qc_hash = hbt_qc_hash;
-  hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-      .hbt_ctx_hash = hbt_ctx_hash;
-  assert(prev_eob >= txb_info->eob);  // eob can't get longer
-  for (int i = 0; i < txb_info->eob; i++) {
-    // Record how coeff changed. Convention: towards zero is negative.
-    if (txb_info->qcoeff[scan[i]] > 0)
-      hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-          .deltas[i] = txb_info->qcoeff[scan[i]] - prev_coeff[i];
-    else
-      hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-          .deltas[i] = prev_coeff[i] - txb_info->qcoeff[scan[i]];
-  }
-  for (int i = txb_info->eob; i < prev_eob; i++) {
-    // If eob got shorter, record that all after it changed to zero.
-    if (prev_coeff[i] > 0)
-      hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-          .deltas[i] = -prev_coeff[i];
-    else
-      hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-          .deltas[i] = prev_coeff[i];
-  }
-  for (int i = prev_eob; i < HBT_EOB; i++) {
-    // Record 'no change' after optimized coefficients run out.
-    hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-        .deltas[i] = 0;
-  }
-
-  if (update) {
-    p->eobs[block] = txb_info->eob;
-    p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
-        txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
-  }
-  return txb_info->eob;
-}
-
-int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index,
-                 TxbInfo *txb_info, const struct macroblock_plane *p, int block,
-                 int *rate_cost) {
-  const int16_t *scan = txb_info->scan_order->scan;
-  int new_eob = 0;
-  int update = 0;
-
-  for (int i = 0; i < txb_info->eob; i++) {
-    // Delta convention is negatives go towards zero, so only apply those ones.
-    if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-            .deltas[i] < 0) {
-      if (txb_info->qcoeff[scan[i]] > 0)
-        txb_info->qcoeff[scan[i]] +=
-            hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-                .deltas[i];
-      else
-        txb_info->qcoeff[scan[i]] -=
-            hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-                .deltas[i];
-
-      update = 1;
-      update_coeff(scan[i], txb_info->qcoeff[scan[i]], txb_info);
-    }
-    if (txb_info->qcoeff[scan[i]]) new_eob = i + 1;
-  }
-
-  // Rate_cost can be calculated here instead (av1_cost_coeffs_txb), but
-  // it is expensive and gives little benefit as long as qc_hash is high bit
-  *rate_cost =
-      hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-          .rate_cost;
-
-  if (update) {
-    txb_info->eob = new_eob;
-    p->eobs[block] = txb_info->eob;
-    p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
-        txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
-  }
-
-  return txb_info->eob;
-}
-
-int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash,
-                     TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
-                     const LV_MAP_EOB_COST *txb_eob_costs,
-                     const struct macroblock_plane *p, int block, int fast_mode,
-                     int *rate_cost) {
-  // Check for qcoeff match
-  int hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH;
-  int hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE;
-
-  if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-              .hbt_qc_hash == hbt_qc_hash &&
-      hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-              .hbt_ctx_hash == hbt_ctx_hash &&
-      hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index]
-          .init) {
-    return hbt_hash_hit(hbt_table_index, hbt_array_index, txb_info, p, block,
-                        rate_cost);
-  } else {
-    return hbt_hash_miss(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs,
-                         txb_eob_costs, p, block, fast_mode, rate_cost);
-  }
-}
-
-int hbt_create_hashes(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
-                      const LV_MAP_EOB_COST *txb_eob_costs,
-                      const struct macroblock_plane *p, int block,
-                      int fast_mode, int *rate_cost) {
-  // Initialize hash table if needed.
-  if (hbt_needs_init) {
-    hbt_init();
-  }
-
-  //// Hash creation
-  uint8_t txb_hash_data[256];  // Asserts below to ensure enough space.
-  const int16_t *scan = txb_info->scan_order->scan;
-  uint8_t chunk = 0;
-  int hash_data_index = 0;
-
-  // Make qc_hash.
-  int packing_index = 0;  // needed for packing.
-  for (int i = 0; i < txb_info->eob; i++) {
-    tran_low_t prechunk = txb_info->qcoeff[scan[i]];
-
-    // Softening: Improves speed. Aligns with signed deltas.
-    if (prechunk < 0) prechunk *= -1;
-
-    // Early kick out: Don't apply feature if there are large coeffs:
-    // If this kickout value is removed or raised beyond int8_t,
-    // widen deltas type in OptTxbQcoeff struct.
-    assert((int8_t)HBT_KICKOUT == HBT_KICKOUT);  // If not, widen types.
-    if (prechunk > HBT_KICKOUT) {
-      av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
-                          txb_info->levels);
-
-      const int update =
-          optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost);
-
-      if (update) {
-        p->eobs[block] = txb_info->eob;
-        p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
-            txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
-      }
-      return txb_info->eob;
-    }
-
-    // Since coeffs are 0 to 3, only 2 bits are needed: pack into bytes
-    if (packing_index == 0) txb_hash_data[hash_data_index] = 0;
-    chunk = prechunk << packing_index;
-    packing_index += 2;
-    txb_hash_data[hash_data_index] |= chunk;
-
-    // Full byte:
-    if (packing_index == 8) {
-      packing_index = 0;
-      hash_data_index++;
-    }
-  }
-  // Needed when packing_index != 0, to include final byte.
-  hash_data_index++;
-  assert(hash_data_index <= 64);
-  // 31 bit qc_hash: index to array
-  uint32_t hbt_qc_hash =
-      av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index);
-
-  // Make ctx_hash.
-  hash_data_index = 0;
-  tran_low_t prechunk;
-
-  for (int i = 0; i < txb_info->eob; i++) {
-    // Save as magnitudes towards or away from zero.
-    if (txb_info->tcoeff[scan[i]] >= 0)
-      prechunk = txb_info->tcoeff[scan[i]] - txb_info->dqcoeff[scan[i]];
-    else
-      prechunk = txb_info->dqcoeff[scan[i]] - txb_info->tcoeff[scan[i]];
-
-    chunk = prechunk & 0xff;
-    txb_hash_data[hash_data_index++] = chunk;
-  }
-
-  // Extra ctx data:
-  // Include dequants.
-  txb_hash_data[hash_data_index++] = txb_info->dequant[0] & 0xff;
-  txb_hash_data[hash_data_index++] = txb_info->dequant[1] & 0xff;
-  chunk = txb_info->txb_ctx->txb_skip_ctx & 0xff;
-  txb_hash_data[hash_data_index++] = chunk;
-  chunk = txb_info->txb_ctx->dc_sign_ctx & 0xff;
-  txb_hash_data[hash_data_index++] = chunk;
-  // eob
-  chunk = txb_info->eob & 0xff;
-  txb_hash_data[hash_data_index++] = chunk;
-  // rdmult (int64)
-  chunk = txb_info->rdmult & 0xff;
-  txb_hash_data[hash_data_index++] = chunk;
-  // tx_type
-  chunk = txb_info->tx_type & 0xff;
-  txb_hash_data[hash_data_index++] = chunk;
-  // base_eob_cost
-  for (int i = 1; i < 3; i++) {  // i = 0 are softened away
-    for (int j = 0; j < SIG_COEF_CONTEXTS_EOB; j++) {
-      chunk = (txb_costs->base_eob_cost[j][i] & 0xff00) >> 8;
-      txb_hash_data[hash_data_index++] = chunk;
-    }
-  }
-  // eob_cost
-  for (int i = 0; i < 11; i++) {
-    for (int j = 0; j < 2; j++) {
-      chunk = (txb_eob_costs->eob_cost[j][i] & 0xff00) >> 8;
-      txb_hash_data[hash_data_index++] = chunk;
-    }
-  }
-  // dc_sign_cost
-  for (int i = 0; i < 2; i++) {
-    for (int j = 0; j < DC_SIGN_CONTEXTS; j++) {
-      chunk = (txb_costs->dc_sign_cost[j][i] & 0xff00) >> 8;
-      txb_hash_data[hash_data_index++] = chunk;
-    }
-  }
-
-  assert(hash_data_index <= 256);
-  // 31 bit ctx_hash: used to index table
-  uint32_t hbt_ctx_hash =
-      av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index);
-  //// End hash creation
-
-  return hbt_search_match(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs,
-                          txb_eob_costs, p, block, fast_mode, rate_cost);
-}
-
-static AOM_FORCE_INLINE int get_coeff_cost_simple(
-    int ci, tran_low_t abs_qc, int coeff_ctx,
-    const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class,
-    const uint8_t *levels) {
-  // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
-  // and not the last (scan_idx != eob - 1)
-  assert(ci > 0);
-  int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
-  if (abs_qc) {
-    cost += av1_cost_literal(1);
-    if (abs_qc > NUM_BASE_LEVELS) {
-      const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
-      cost += get_br_cost(abs_qc, br_ctx, txb_costs->lps_cost[br_ctx]);
-      cost += get_golomb_cost(abs_qc);
-    }
-  }
-  return cost;
-}
-
-static INLINE int get_coeff_cost_general(int is_last, int ci, tran_low_t abs_qc,
-                                         int sign, int coeff_ctx,
-                                         int dc_sign_ctx,
-                                         const LV_MAP_COEFF_COST *txb_costs,
-                                         int bwl, TX_CLASS tx_class,
-                                         const uint8_t *levels) {
-  int cost = 0;
-  if (is_last) {
-    cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
-  } else {
-    cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
-  }
-  if (abs_qc != 0) {
-    if (ci == 0) {
-      cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign];
-    } else {
-      cost += av1_cost_literal(1);
-    }
-    if (abs_qc > NUM_BASE_LEVELS) {
-      const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
-      cost += get_br_cost(abs_qc, br_ctx, txb_costs->lps_cost[br_ctx]);
-      cost += get_golomb_cost(abs_qc);
-    }
-  }
-  return cost;
-}
-
-static INLINE void get_qc_dqc_low(tran_low_t abs_qc, int sign, int dqv,
-                                  int shift, tran_low_t *qc_low,
-                                  tran_low_t *dqc_low) {
-  tran_low_t abs_qc_low = abs_qc - 1;
-  *qc_low = (-sign ^ abs_qc_low) + sign;
-  assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low);
-  tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift;
-  *dqc_low = (-sign ^ abs_dqc_low) + sign;
-  assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low);
-}
-
-static INLINE void update_coeff_general(
-    int *accu_rate, int64_t *accu_dist, int si, int eob, TX_SIZE tx_size,
-    TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift,
-    int dc_sign_ctx, const int16_t *dequant, const int16_t *scan,
-    const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
-    tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels) {
-  const int dqv = dequant[si != 0];
-  const int ci = scan[si];
-  const tran_low_t qc = qcoeff[ci];
-  const int is_last = si == (eob - 1);
-  const int coeff_ctx = get_lower_levels_ctx_general(
-      is_last, si, bwl, height, levels, ci, tx_size, tx_class);
-  if (qc == 0) {
-    *accu_rate += txb_costs->base_cost[coeff_ctx][0];
-  } else {
-    const int sign = (qc < 0) ? 1 : 0;
-    const tran_low_t abs_qc = abs(qc);
-    const tran_low_t tqc = tcoeff[ci];
-    const tran_low_t dqc = dqcoeff[ci];
-    const int64_t dist = get_coeff_dist(tqc, dqc, shift);
-    const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
-    const int rate =
-        get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx,
-                               dc_sign_ctx, txb_costs, bwl, tx_class, levels);
-    const int64_t rd = RDCOST(rdmult, rate, dist);
-
-    tran_low_t qc_low, dqc_low;
-    get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
-    const tran_low_t abs_qc_low = abs_qc - 1;
-    const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift);
-    const int rate_low =
-        get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx,
-                               dc_sign_ctx, txb_costs, bwl, tx_class, levels);
-    const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
-    if (rd_low < rd) {
-      qcoeff[ci] = qc_low;
-      dqcoeff[ci] = dqc_low;
-      levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
-      *accu_rate += rate_low;
-      *accu_dist += dist_low - dist0;
-    } else {
-      *accu_rate += rate;
-      *accu_dist += dist - dist0;
-    }
-  }
-}
-
-static AOM_FORCE_INLINE void update_coeff_simple(
-    int *accu_rate, int si, int eob, TX_SIZE tx_size, TX_CLASS tx_class,
-    int bwl, int64_t rdmult, int shift, const int16_t *dequant,
-    const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs,
-    const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff,
-    uint8_t *levels) {
-  const int dqv = dequant[1];
-  (void)eob;
-  // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
-  // and not the last (scan_idx != eob - 1)
-  assert(si != eob - 1);
-  assert(si > 0);
-  const int ci = scan[si];
-  const tran_low_t qc = qcoeff[ci];
-  const int coeff_ctx =
-      get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
-  if (qc == 0) {
-    *accu_rate += txb_costs->base_cost[coeff_ctx][0];
-  } else {
-    const tran_low_t abs_qc = abs(qc);
-    const tran_low_t tqc = tcoeff[ci];
-    const tran_low_t dqc = dqcoeff[ci];
-    const int rate = get_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs,
-                                           bwl, tx_class, levels);
-    if (abs(dqc) < abs(tqc)) {
-      *accu_rate += rate;
-      return;
-    }
-    const int64_t dist = get_coeff_dist(tqc, dqc, shift);
-    const int64_t rd = RDCOST(rdmult, rate, dist);
-
-    const int sign = (qc < 0) ? 1 : 0;
-    tran_low_t qc_low, dqc_low;
-    get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
-    const tran_low_t abs_qc_low = abs_qc - 1;
-    const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift);
-    const int rate_low = get_coeff_cost_simple(
-        ci, abs_qc_low, coeff_ctx, txb_costs, bwl, tx_class, levels);
-    const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low);
-    if (rd_low < rd) {
-      qcoeff[ci] = qc_low;
-      dqcoeff[ci] = dqc_low;
-      levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
-      *accu_rate += rate_low;
-    } else {
-      *accu_rate += rate;
-    }
-  }
-}
-
-static AOM_FORCE_INLINE void update_coeff_eob(
-    int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci,
-    int si, TX_SIZE tx_size, TX_CLASS tx_class, int bwl, int height,
-    int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant,
-    const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs,
-    const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
-    tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness) {
-  const int dqv = dequant[si != 0];
-  assert(si != *eob - 1);
-  const int ci = scan[si];
-  const tran_low_t qc = qcoeff[ci];
-  const int coeff_ctx =
-      get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
-  if (qc == 0) {
-    *accu_rate += txb_costs->base_cost[coeff_ctx][0];
-  } else {
-    int lower_level = 0;
-    const tran_low_t abs_qc = abs(qc);
-    const tran_low_t tqc = tcoeff[ci];
-    const tran_low_t dqc = dqcoeff[ci];
-    const int sign = (qc < 0) ? 1 : 0;
-    const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
-    int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0;
-    int rate =
-        get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx,
-                               txb_costs, bwl, tx_class, levels);
-    int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
-
-    tran_low_t qc_low, dqc_low;
-    get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
-    const tran_low_t abs_qc_low = abs_qc - 1;
-    const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0;
-    const int rate_low =
-        get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx,
-                               txb_costs, bwl, tx_class, levels);
-    const int64_t rd_low =
-        RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
-
-    int lower_level_new_eob = 0;
-    const int new_eob = si + 1;
-    uint8_t tmp_levels[3];
-    for (int ni = 0; ni < *nz_num; ++ni) {
-      const int last_ci = nz_ci[ni];
-      tmp_levels[ni] = levels[get_padded_idx(last_ci, bwl)];
-      levels[get_padded_idx(last_ci, bwl)] = 0;
-    }
-
-    const int coeff_ctx_new_eob = get_lower_levels_ctx_general(
-        1, si, bwl, height, levels, ci, tx_size, tx_class);
-    const int new_eob_cost =
-        get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class);
-    int rate_coeff_eob =
-        new_eob_cost + get_coeff_cost_general(1, ci, abs_qc, sign,
-                                              coeff_ctx_new_eob, dc_sign_ctx,
-                                              txb_costs, bwl, tx_class, levels);
-    int64_t dist_new_eob = dist;
-    int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
-
-    if (abs_qc_low > 0) {
-      const int rate_coeff_eob_low =
-          new_eob_cost +
-          get_coeff_cost_general(1, ci, abs_qc_low, sign, coeff_ctx_new_eob,
-                                 dc_sign_ctx, txb_costs, bwl, tx_class, levels);
-      const int64_t dist_new_eob_low = dist_low;
-      const int64_t rd_new_eob_low =
-          RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low);
-      if (rd_new_eob_low < rd_new_eob) {
-        lower_level_new_eob = 1;
-        rd_new_eob = rd_new_eob_low;
-        rate_coeff_eob = rate_coeff_eob_low;
-        dist_new_eob = dist_new_eob_low;
-      }
-    }
-
-    if (rd_low < rd) {
-      lower_level = 1;
-      rd = rd_low;
-      rate = rate_low;
-      dist = dist_low;
-    }
-
-    if (sharpness == 0 && rd_new_eob < rd) {
-      for (int ni = 0; ni < *nz_num; ++ni) {
-        int last_ci = nz_ci[ni];
-        // levels[get_padded_idx(last_ci, bwl)] = 0;
-        qcoeff[last_ci] = 0;
-        dqcoeff[last_ci] = 0;
-      }
-      *eob = new_eob;
-      *nz_num = 0;
-      *accu_rate = rate_coeff_eob;
-      *accu_dist = dist_new_eob;
-      lower_level = lower_level_new_eob;
-    } else {
-      for (int ni = 0; ni < *nz_num; ++ni) {
-        const int last_ci = nz_ci[ni];
-        levels[get_padded_idx(last_ci, bwl)] = tmp_levels[ni];
-      }
-      *accu_rate += rate;
-      *accu_dist += dist;
-    }
-
-    if (lower_level) {
-      qcoeff[ci] = qc_low;
-      dqcoeff[ci] = dqc_low;
-      levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX);
-    }
-    if (qcoeff[ci]) {
-      nz_ci[*nz_num] = ci;
-      ++*nz_num;
-    }
-  }
-}
-
-static INLINE void update_skip(int *accu_rate, int64_t accu_dist, int *eob,
-                               int nz_num, int *nz_ci, int64_t rdmult,
-                               int skip_cost, int non_skip_cost,
-                               tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                               int sharpness) {
-  const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist);
-  const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0);
-  if (sharpness == 0 && rd_new_eob < rd) {
-    for (int i = 0; i < nz_num; ++i) {
-      const int ci = nz_ci[i];
-      qcoeff[ci] = 0;
-      dqcoeff[ci] = 0;
-      // no need to set up levels because this is the last step
-      // levels[get_padded_idx(ci, bwl)] = 0;
-    }
-    *accu_rate = 0;
-    *eob = 0;
-  }
-}
-
-int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
-                         int block, TX_SIZE tx_size, TX_TYPE tx_type,
-                         const TXB_CTX *const txb_ctx, int *rate_cost,
-                         int sharpness) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  const PLANE_TYPE plane_type = get_plane_type(plane);
-  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-  const TX_CLASS tx_class = tx_type_to_class[tx_type];
-  const MB_MODE_INFO *mbmi = xd->mi[0];
-  const struct macroblock_plane *p = &x->plane[plane];
-  struct macroblockd_plane *pd = &xd->plane[plane];
-  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
-  const int16_t *dequant = p->dequant_QTX;
-  const int bwl = get_txb_bwl(tx_size);
-  const int width = get_txb_wide(tx_size);
-  const int height = get_txb_high(tx_size);
-  assert(width == (1 << bwl));
-  const int is_inter = is_inter_block(mbmi);
-  const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type);
-  const int16_t *scan = scan_order->scan;
-  const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type];
-  const int eob_multi_size = txsize_log2_minus4[tx_size];
-  const LV_MAP_EOB_COST *txb_eob_costs =
-      &x->eob_costs[eob_multi_size][plane_type];
-
-  const int shift = av1_get_tx_scale(tx_size);
-  const int64_t rdmult =
-      ((x->rdmult * plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))) +
-       2) >>
-      (sharpness +
-       (cpi->oxcf.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4
-            ? 7 - mbmi->segment_id
-            : 2) +
-       (cpi->oxcf.aq_mode != VARIANCE_AQ &&
-                cpi->oxcf.deltaq_mode > NO_DELTA_Q && x->sb_energy_level < 0
-            ? (3 - x->sb_energy_level)
-            : 0));
-
-  uint8_t levels_buf[TX_PAD_2D];
-  uint8_t *const levels = set_levels(levels_buf, width);
-
-  av1_txb_init_levels(qcoeff, width, height, levels);
-
-  // TODO(angirbird): check iqmatrix
-
-  const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0];
-  const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
-  int eob = p->eobs[block];
-  const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class);
-  int accu_rate = eob_cost;
-  int64_t accu_dist = 0;
-  int si = eob - 1;
-  const int ci = scan[si];
-  const tran_low_t qc = qcoeff[ci];
-  const tran_low_t abs_qc = abs(qc);
-  const int sign = qc < 0;
-  const int max_nz_num = 2;
-  int nz_num = 1;
-  int nz_ci[3] = { ci, 0, 0 };
-  if (abs_qc >= 2) {
-    update_coeff_general(&accu_rate, &accu_dist, si, eob, tx_size, tx_class,
-                         bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx,
-                         dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
-                         levels);
-    --si;
-  } else {
-    assert(abs_qc == 1);
-    const int coeff_ctx = get_lower_levels_ctx_general(
-        1, si, bwl, height, levels, ci, tx_size, tx_class);
-    accu_rate += get_coeff_cost_general(1, ci, abs_qc, sign, coeff_ctx,
-                                        txb_ctx->dc_sign_ctx, txb_costs, bwl,
-                                        tx_class, levels);
-    const tran_low_t tqc = tcoeff[ci];
-    const tran_low_t dqc = dqcoeff[ci];
-    const int64_t dist = get_coeff_dist(tqc, dqc, shift);
-    const int64_t dist0 = get_coeff_dist(tqc, 0, shift);
-    accu_dist += dist - dist0;
-    --si;
-  }
-
-#define UPDATE_COEFF_EOB_CASE(tx_class_literal)                            \
-  case tx_class_literal:                                                   \
-    for (; si >= 0 && nz_num <= max_nz_num; --si) {                        \
-      update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si,   \
-                       tx_size, tx_class_literal, bwl, height,             \
-                       txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, \
-                       txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff,  \
-                       levels, sharpness);                                 \
-    }                                                                      \
-    break;
-  switch (tx_class) {
-    UPDATE_COEFF_EOB_CASE(TX_CLASS_2D);
-    UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ);
-    UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT);
-#undef UPDATE_COEFF_EOB_CASE
-    default: assert(false);
-  }
-
-  if (si == -1 && nz_num <= max_nz_num) {
-    update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost,
-                non_skip_cost, qcoeff, dqcoeff, sharpness);
-  }
-
-#define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal)                             \
-  case tx_class_literal:                                                       \
-    for (; si >= 1; --si) {                                                    \
-      update_coeff_simple(&accu_rate, si, eob, tx_size, tx_class_literal, bwl, \
-                          rdmult, shift, dequant, scan, txb_costs, tcoeff,     \
-                          qcoeff, dqcoeff, levels);                            \
-    }                                                                          \
-    break;
-  switch (tx_class) {
-    UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D);
-    UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ);
-    UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT);
-#undef UPDATE_COEFF_SIMPLE_CASE
-    default: assert(false);
-  }
-
-  // DC position
-  if (si == 0) {
-    // no need to update accu_dist because it's not used after this point
-    int64_t dummy_dist = 0;
-    update_coeff_general(&accu_rate, &dummy_dist, si, eob, tx_size, tx_class,
-                         bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx,
-                         dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff,
-                         levels);
-  }
-
-  const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
-  if (eob == 0)
-    accu_rate += skip_cost;
-  else
-    accu_rate += non_skip_cost + tx_type_cost;
-
-  p->eobs[block] = eob;
-  p->txb_entropy_ctx[block] =
-      av1_get_txb_entropy_context(qcoeff, scan_order, p->eobs[block]);
-
-  *rate_cost = accu_rate;
-  return eob;
-}
-
-// This function is deprecated, but we keep it here because hash trellis
-// is not integrated with av1_optimize_txb_new yet
-int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
-                     int blk_row, int blk_col, int block, TX_SIZE tx_size,
-                     TXB_CTX *txb_ctx, int fast_mode, int *rate_cost) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const PLANE_TYPE plane_type = get_plane_type(plane);
-  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-  const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
-                                          tx_size, cm->reduced_tx_set_used);
-  const MB_MODE_INFO *mbmi = xd->mi[0];
-  const struct macroblock_plane *p = &x->plane[plane];
-  struct macroblockd_plane *pd = &xd->plane[plane];
-  const int eob = p->eobs[block];
-  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
-  const int16_t *dequant = p->dequant_QTX;
-  const int seg_eob = av1_get_max_eob(tx_size);
-  const int bwl = get_txb_bwl(tx_size);
-  const int width = get_txb_wide(tx_size);
-  const int height = get_txb_high(tx_size);
-  const int is_inter = is_inter_block(mbmi);
-  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
-  const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type];
-  const int eob_multi_size = txsize_log2_minus4[tx_size];
-  const LV_MAP_EOB_COST txb_eob_costs =
-      x->eob_costs[eob_multi_size][plane_type];
-
-  const int shift = av1_get_tx_scale(tx_size);
-  const int64_t rdmult =
-      ((x->rdmult * plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))) +
-       2) >>
-      2;
-  uint8_t levels_buf[TX_PAD_2D];
-  uint8_t *const levels = set_levels(levels_buf, width);
-  const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size);
-  const qm_val_t *iqmatrix =
-      IS_2D_TRANSFORM(tx_type)
-          ? pd->seg_iqmatrix[mbmi->segment_id][qm_tx_size]
-          : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
-  assert(width == (1 << bwl));
-  const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type);
-  TxbInfo txb_info = {
-    qcoeff,   levels,       dqcoeff,    tcoeff,  dequant, shift,
-    tx_size,  txs_ctx,      tx_type,    bwl,     width,   height,
-    eob,      seg_eob,      scan_order, txb_ctx, rdmult,  &cm->coeff_ctx_table,
-    iqmatrix, tx_type_cost,
-  };
-
-  // Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls
-  // by storing the coefficient deltas in a hash table.
-  // Currently disabled in speedfeatures.c
-  if (eob <= HBT_EOB && eob > 0 && cpi->sf.use_hash_based_trellis) {
-    return hbt_create_hashes(&txb_info, txb_costs, &txb_eob_costs, p, block,
-                             fast_mode, rate_cost);
-  }
-
-  av1_txb_init_levels(qcoeff, width, height, levels);
-
-  const int update =
-      optimize_txb(&txb_info, txb_costs, &txb_eob_costs, rate_cost);
-
-  if (update) {
-    p->eobs[block] = txb_info.eob;
-    p->txb_entropy_ctx[block] =
-        av1_get_txb_entropy_context(qcoeff, scan_order, txb_info.eob);
-  }
-  return txb_info.eob;
-}
-
-int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
-                                const SCAN_ORDER *scan_order, int eob) {
-  const int16_t *const scan = scan_order->scan;
-  int cul_level = 0;
-  int c;
-
-  if (eob == 0) return 0;
-  for (c = 0; c < eob; ++c) {
-    cul_level += abs(qcoeff[scan[c]]);
-    if (cul_level > COEFF_CONTEXT_MASK) break;
-  }
-
-  cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
-  set_dc_sign(&cul_level, qcoeff[0]);
-
-  return cul_level;
-}
-
-void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
-                              BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                              void *arg) {
-  struct tokenize_b_args *const args = arg;
-  const AV1_COMP *cpi = args->cpi;
-  const AV1_COMMON *cm = &cpi->common;
-  ThreadData *const td = args->td;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblock_plane *p = &x->plane[plane];
-  struct macroblockd_plane *pd = &xd->plane[plane];
-  const uint16_t eob = p->eobs[block];
-  const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  const PLANE_TYPE plane_type = pd->plane_type;
-  const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
-                                          tx_size, cm->reduced_tx_set_used);
-  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
-  const int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob);
-  av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col,
-                   blk_row);
-}
-
-static void update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                 int blk_row, int blk_col, int plane,
-                                 TX_SIZE tx_size, FRAME_COUNTS *counts,
-                                 uint8_t allow_update_cdf) {
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  int is_inter = is_inter_block(mbmi);
-  FRAME_CONTEXT *fc = xd->tile_ctx;
-#if !CONFIG_ENTROPY_STATS
-  (void)counts;
-#endif  // !CONFIG_ENTROPY_STATS
-
-  // Only y plane's tx_type is updated
-  if (plane > 0) return;
-  TX_TYPE tx_type = av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, tx_size,
-                                    cm->reduced_tx_set_used);
-  if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 &&
-      cm->base_qindex > 0 && !mbmi->skip &&
-      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
-    const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used);
-    if (eset > 0) {
-      const TxSetType tx_set_type =
-          av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used);
-      if (is_inter) {
-        if (allow_update_cdf) {
-          update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]],
-                     av1_ext_tx_ind[tx_set_type][tx_type],
-                     av1_num_ext_tx_set[tx_set_type]);
-        }
-#if CONFIG_ENTROPY_STATS
-        ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]]
-                              [av1_ext_tx_ind[tx_set_type][tx_type]];
-#endif  // CONFIG_ENTROPY_STATS
-      } else {
-        PREDICTION_MODE intra_dir;
-        if (mbmi->filter_intra_mode_info.use_filter_intra)
-          intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
-                                             .filter_intra_mode];
-        else
-          intra_dir = mbmi->mode;
-#if CONFIG_ENTROPY_STATS
-        ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir]
-                              [av1_ext_tx_ind[tx_set_type][tx_type]];
-#endif  // CONFIG_ENTROPY_STATS
-        if (allow_update_cdf) {
-          update_cdf(
-              fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][intra_dir],
-              av1_ext_tx_ind[tx_set_type][tx_type],
-              av1_num_ext_tx_set[tx_set_type]);
-        }
-      }
-    }
-  }
-}
-
-void av1_update_and_record_txb_context(int plane, int block, int blk_row,
-                                       int blk_col, BLOCK_SIZE plane_bsize,
-                                       TX_SIZE tx_size, void *arg) {
-  struct tokenize_b_args *const args = arg;
-  const AV1_COMP *cpi = args->cpi;
-  const AV1_COMMON *cm = &cpi->common;
-  ThreadData *const td = args->td;
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblock_plane *p = &x->plane[plane];
-  struct macroblockd_plane *pd = &xd->plane[plane];
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const int eob = p->eobs[block];
-  TXB_CTX txb_ctx;
-  get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col,
-              pd->left_context + blk_row, &txb_ctx);
-  const int bwl = get_txb_bwl(tx_size);
-  const int width = get_txb_wide(tx_size);
-  const int height = get_txb_high(tx_size);
-  const uint8_t allow_update_cdf = args->allow_update_cdf;
-  const TX_SIZE txsize_ctx = get_txsize_entropy_ctx(tx_size);
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#if CONFIG_ENTROPY_STATS
-  int cdf_idx = cm->coef_cdf_category;
-#endif  // CONFIG_ENTROPY_STATS
-
-#if CONFIG_ENTROPY_STATS
-  ++td->counts->txb_skip[cdf_idx][txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0];
-#endif  // CONFIG_ENTROPY_STATS
-  if (allow_update_cdf) {
-    update_cdf(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], eob == 0,
-               2);
-  }
-
-  x->mbmi_ext->txb_skip_ctx[plane][block] = txb_ctx.txb_skip_ctx;
-  x->mbmi_ext->eobs[plane][block] = eob;
-
-  if (eob == 0) {
-    av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, blk_row);
-    return;
-  }
-
-  tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block);
-  const int segment_id = mbmi->segment_id;
-  const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size);
-  const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob);
-
-  uint8_t levels_buf[TX_PAD_2D];
-  uint8_t *const levels = set_levels(levels_buf, width);
-  av1_txb_init_levels(tcoeff, width, height, levels);
-  update_tx_type_count(cm, xd, blk_row, blk_col, plane, tx_size, td->counts,
-                       allow_update_cdf);
-
-  const PLANE_TYPE plane_type = pd->plane_type;
-  const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
-                                          tx_size, cm->reduced_tx_set_used);
-  const TX_CLASS tx_class = tx_type_to_class[tx_type];
-  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
-  const int16_t *const scan = scan_order->scan;
-#if CONFIG_ENTROPY_STATS
-  av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx,
-                         td->counts, allow_update_cdf);
-#else
-  av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx,
-                         allow_update_cdf);
-#endif
-
-  DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
-  av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts);
-
-  for (int c = eob - 1; c >= 0; --c) {
-    const int pos = scan[c];
-    const int coeff_ctx = coeff_contexts[pos];
-    const tran_low_t v = qcoeff[pos];
-    const tran_low_t level = abs(v);
-
-    if (allow_update_cdf) {
-      if (c == eob - 1) {
-        assert(coeff_ctx < 4);
-        update_cdf(
-            ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx],
-            AOMMIN(level, 3) - 1, 3);
-      } else {
-        update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx],
-                   AOMMIN(level, 3), 4);
-      }
-    }
-    {
-      if (c == eob - 1) {
-        assert(coeff_ctx < 4);
-#if CONFIG_ENTROPY_STATS
-        ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type]
-                                          [coeff_ctx][AOMMIN(level, 3) - 1];
-      } else {
-        ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type]
-                                      [coeff_ctx][AOMMIN(level, 3)];
-#endif
-      }
-    }
-    if (level > NUM_BASE_LEVELS) {
-      const int base_range = level - 1 - NUM_BASE_LEVELS;
-      const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
-      for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-        const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
-        if (allow_update_cdf) {
-          update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txsize_ctx, TX_32X32)]
-                                         [plane_type][br_ctx],
-                     k, BR_CDF_SIZE);
-        }
-        for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
-#if CONFIG_ENTROPY_STATS
-          ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps]
-                                 [br_ctx][lps == k];
-#endif  // CONFIG_ENTROPY_STATS
-          if (lps == k) break;
-        }
-#if CONFIG_ENTROPY_STATS
-        ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)]
-                                     [plane_type][br_ctx][k];
-#endif
-        if (k < BR_CDF_SIZE - 1) break;
-      }
-    }
-  }
-
-  // Update the context needed to code the DC sign (if applicable)
-  if (tcoeff[0] != 0) {
-    const int dc_sign = (tcoeff[0] < 0) ? 1 : 0;
-    const int dc_sign_ctx = txb_ctx.dc_sign_ctx;
-#if CONFIG_ENTROPY_STATS
-    ++td->counts->dc_sign[plane_type][dc_sign_ctx][dc_sign];
-#endif  // CONFIG_ENTROPY_STATS
-    if (allow_update_cdf)
-      update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2);
-    x->mbmi_ext->dc_sign_ctx[plane][block] = dc_sign_ctx;
-  }
-
-  const int cul_level = av1_get_txb_entropy_context(tcoeff, scan_order, eob);
-  av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col,
-                   blk_row);
-}
-
-void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td,
-                            RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate,
-                            int mi_row, int mi_col, uint8_t allow_update_cdf) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  struct tokenize_b_args arg = { cpi, td, NULL, 0, allow_update_cdf };
-  (void)rate;
-  (void)mi_row;
-  (void)mi_col;
-  if (mbmi->skip) {
-    av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes);
-    return;
-  }
-
-  if (!dry_run) {
-    av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
-                                  av1_update_and_record_txb_context, &arg,
-                                  num_planes);
-  } else if (dry_run == DRY_RUN_NORMAL) {
-    av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
-                                  av1_update_txb_context_b, &arg, num_planes);
-  } else {
-    printf("DRY_RUN_COSTCOEFFS is not supported yet\n");
-    assert(0);
-  }
-}
diff --git a/third_party/aom/av1/encoder/encodetxb.h b/third_party/aom/av1/encoder/encodetxb.h
deleted file mode 100644
index 40ae343b0..000000000
--- a/third_party/aom/av1/encoder/encodetxb.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ENCODETXB_H_
-#define AOM_AV1_ENCODER_ENCODETXB_H_
-
-#include "config/aom_config.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "av1/encoder/block.h"
-#include "av1/encoder/encoder.h"
-#include "aom_dsp/bitwriter.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct TxbInfo {
-  tran_low_t *qcoeff;
-  uint8_t *levels;  // absolute values and clamped to 255.
-  tran_low_t *dqcoeff;
-  const tran_low_t *tcoeff;
-  const int16_t *dequant;
-  int shift;
-  TX_SIZE tx_size;
-  TX_SIZE txs_ctx;
-  TX_TYPE tx_type;
-  int bwl;
-  int width;
-  int height;
-  int eob;
-  int seg_eob;
-  const SCAN_ORDER *scan_order;
-  TXB_CTX *txb_ctx;
-  int64_t rdmult;
-  const LV_MAP_CTX_TABLE *coeff_ctx_table;
-  const qm_val_t *iqmatrix;
-  int tx_type_cost;
-} TxbInfo;
-
-void av1_alloc_txb_buf(AV1_COMP *cpi);
-void av1_free_txb_buf(AV1_COMP *cpi);
-int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x,
-                        const int plane, const int block, const TX_SIZE tx_size,
-                        const TX_TYPE tx_type, const TXB_CTX *const txb_ctx);
-void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
-                          aom_writer *w, int blk_row, int blk_col, int plane,
-                          TX_SIZE tx_size, const tran_low_t *tcoeff,
-                          uint16_t eob, TXB_CTX *txb_ctx);
-void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row,
-                         int mi_col, aom_writer *w, BLOCK_SIZE bsize);
-int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
-                                const SCAN_ORDER *scan_order, int eob);
-void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td,
-                            RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate,
-                            int mi_row, int mi_col, uint8_t allow_update_cdf);
-
-void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
-                              BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                              void *arg);
-
-void av1_update_and_record_txb_context(int plane, int block, int blk_row,
-                                       int blk_col, BLOCK_SIZE plane_bsize,
-                                       TX_SIZE tx_size, void *arg);
-
-void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                          int mi_row, int mi_col);
-
-void hbt_destroy();
-int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
-                         int block, TX_SIZE tx_size, TX_TYPE tx_type,
-                         const TXB_CTX *const txb_ctx, int *rate_cost,
-                         int sharpness);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // AOM_AV1_ENCODER_ENCODETXB_H_
diff --git a/third_party/aom/av1/encoder/ethread.c b/third_party/aom/av1/encoder/ethread.c
deleted file mode 100644
index e8ac30bb5..000000000
--- a/third_party/aom/av1/encoder/ethread.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/ethread.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
-  for (int i = 0; i < REFERENCE_MODES; i++)
-    td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
-
-  for (int i = 0; i < REF_FRAMES; i++)
-    td->rd_counts.global_motion_used[i] +=
-        td_t->rd_counts.global_motion_used[i];
-
-  td->rd_counts.compound_ref_used_flag |=
-      td_t->rd_counts.compound_ref_used_flag;
-  td->rd_counts.skip_mode_used_flag |= td_t->rd_counts.skip_mode_used_flag;
-}
-
-static int enc_worker_hook(void *arg1, void *unused) {
-  EncWorkerData *const thread_data = (EncWorkerData *)arg1;
-  AV1_COMP *const cpi = thread_data->cpi;
-  const AV1_COMMON *const cm = &cpi->common;
-  const int tile_cols = cm->tile_cols;
-  const int tile_rows = cm->tile_rows;
-  int t;
-
-  (void)unused;
-
-  for (t = thread_data->start; t < tile_rows * tile_cols;
-       t += cpi->num_workers) {
-    int tile_row = t / tile_cols;
-    int tile_col = t % tile_cols;
-
-    av1_encode_tile(cpi, thread_data->td, tile_row, tile_col);
-  }
-
-  return 1;
-}
-
-static void create_enc_workers(AV1_COMP *cpi, int num_workers) {
-  AV1_COMMON *const cm = &cpi->common;
-  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-
-  CHECK_MEM_ERROR(cm, cpi->workers,
-                  aom_malloc(num_workers * sizeof(*cpi->workers)));
-
-  CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
-                  aom_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
-
-  for (int i = 0; i < num_workers; i++) {
-    AVxWorker *const worker = &cpi->workers[i];
-    EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
-
-    ++cpi->num_workers;
-    winterface->init(worker);
-
-    thread_data->cpi = cpi;
-
-    if (i < num_workers - 1) {
-      // Allocate thread data.
-      CHECK_MEM_ERROR(cm, thread_data->td,
-                      aom_memalign(32, sizeof(*thread_data->td)));
-      av1_zero(*thread_data->td);
-
-      // Set up pc_tree.
-      thread_data->td->pc_tree = NULL;
-      av1_setup_pc_tree(cm, thread_data->td);
-
-      CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf,
-                      (uint8_t *)aom_memalign(
-                          16, MAX_MB_PLANE * MAX_SB_SQUARE *
-                                  sizeof(*thread_data->td->above_pred_buf)));
-      CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf,
-                      (uint8_t *)aom_memalign(
-                          16, MAX_MB_PLANE * MAX_SB_SQUARE *
-                                  sizeof(*thread_data->td->left_pred_buf)));
-
-      CHECK_MEM_ERROR(
-          cm, thread_data->td->wsrc_buf,
-          (int32_t *)aom_memalign(
-              16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
-
-      for (int x = 0; x < 2; x++)
-        for (int y = 0; y < 2; y++)
-          CHECK_MEM_ERROR(
-              cm, thread_data->td->hash_value_buffer[x][y],
-              (uint32_t *)aom_malloc(
-                  AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
-                  sizeof(*thread_data->td->hash_value_buffer[0][0])));
-
-      CHECK_MEM_ERROR(
-          cm, thread_data->td->mask_buf,
-          (int32_t *)aom_memalign(
-              16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf)));
-      // Allocate frame counters in thread data.
-      CHECK_MEM_ERROR(cm, thread_data->td->counts,
-                      aom_calloc(1, sizeof(*thread_data->td->counts)));
-
-      // Allocate buffers used by palette coding mode.
-      CHECK_MEM_ERROR(
-          cm, thread_data->td->palette_buffer,
-          aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
-
-      CHECK_MEM_ERROR(
-          cm, thread_data->td->tmp_conv_dst,
-          aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
-                               sizeof(*thread_data->td->tmp_conv_dst)));
-      for (int j = 0; j < 2; ++j) {
-        CHECK_MEM_ERROR(
-            cm, thread_data->td->tmp_obmc_bufs[j],
-            aom_memalign(16, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
-                                 sizeof(*thread_data->td->tmp_obmc_bufs[j])));
-      }
-
-      // Create threads
-      if (!winterface->reset(worker))
-        aom_internal_error(&cm->error, AOM_CODEC_ERROR,
-                           "Tile encoder thread creation failed");
-    } else {
-      // Main thread acts as a worker and uses the thread data in cpi.
-      thread_data->td = &cpi->td;
-    }
-    winterface->sync(worker);
-  }
-}
-
-static void launch_enc_workers(AV1_COMP *cpi, int num_workers) {
-  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-  // Encode a frame
-  for (int i = 0; i < num_workers; i++) {
-    AVxWorker *const worker = &cpi->workers[i];
-    EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
-
-    // Set the starting tile for each thread.
-    thread_data->start = i;
-
-    if (i == cpi->num_workers - 1)
-      winterface->execute(worker);
-    else
-      winterface->launch(worker);
-  }
-}
-
-static void sync_enc_workers(AV1_COMP *cpi, int num_workers) {
-  const AVxWorkerInterface *const winterface = aom_get_worker_interface();
-
-  // Encoding ends.
-  for (int i = 0; i < num_workers; i++) {
-    AVxWorker *const worker = &cpi->workers[i];
-    winterface->sync(worker);
-  }
-}
-
-static void accumulate_counters_enc_workers(AV1_COMP *cpi, int num_workers) {
-  for (int i = 0; i < num_workers; i++) {
-    AVxWorker *const worker = &cpi->workers[i];
-    EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
-    cpi->intrabc_used |= thread_data->td->intrabc_used_this_tile;
-    // Accumulate counters.
-    if (i < cpi->num_workers - 1) {
-      av1_accumulate_frame_counts(&cpi->counts, thread_data->td->counts);
-      accumulate_rd_opt(&cpi->td, thread_data->td);
-      cpi->td.mb.txb_split_count += thread_data->td->mb.txb_split_count;
-    }
-  }
-}
-
-static void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
-                                int num_workers) {
-  for (int i = 0; i < num_workers; i++) {
-    AVxWorker *const worker = &cpi->workers[i];
-    EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
-
-    worker->hook = hook;
-    worker->data1 = thread_data;
-    worker->data2 = NULL;
-
-    // Before encoding a frame, copy the thread data from cpi.
-    if (thread_data->td != &cpi->td) {
-      thread_data->td->mb = cpi->td.mb;
-      thread_data->td->rd_counts = cpi->td.rd_counts;
-      thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf;
-      thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
-      thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
-      for (int x = 0; x < 2; x++) {
-        for (int y = 0; y < 2; y++) {
-          memcpy(thread_data->td->hash_value_buffer[x][y],
-                 cpi->td.mb.hash_value_buffer[x][y],
-                 AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
-                     sizeof(*thread_data->td->hash_value_buffer[0][0]));
-          thread_data->td->mb.hash_value_buffer[x][y] =
-              thread_data->td->hash_value_buffer[x][y];
-        }
-      }
-      thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
-    }
-    if (thread_data->td->counts != &cpi->counts) {
-      memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts));
-    }
-
-    if (i < num_workers - 1) {
-      thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer;
-      thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst;
-      for (int j = 0; j < 2; ++j) {
-        thread_data->td->mb.tmp_obmc_bufs[j] =
-            thread_data->td->tmp_obmc_bufs[j];
-      }
-
-      thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst;
-      for (int j = 0; j < 2; ++j) {
-        thread_data->td->mb.e_mbd.tmp_obmc_bufs[j] =
-            thread_data->td->mb.tmp_obmc_bufs[j];
-      }
-    }
-  }
-}
-
-void av1_encode_tiles_mt(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int tile_cols = cm->tile_cols;
-  const int tile_rows = cm->tile_rows;
-  int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols * tile_rows);
-
-  if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
-    av1_alloc_tile_data(cpi);
-
-  av1_init_tile_data(cpi);
-  // Only run once to create threads and allocate thread data.
-  if (cpi->num_workers == 0) {
-    create_enc_workers(cpi, num_workers);
-  } else {
-    num_workers = AOMMIN(num_workers, cpi->num_workers);
-  }
-  prepare_enc_workers(cpi, enc_worker_hook, num_workers);
-  launch_enc_workers(cpi, num_workers);
-  sync_enc_workers(cpi, num_workers);
-  accumulate_counters_enc_workers(cpi, num_workers);
-}
-
-// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int'
-// members, so we treat it as an array, and sum over the whole length.
-void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts,
-                                 const FRAME_COUNTS *counts) {
-  unsigned int *const acc = (unsigned int *)acc_counts;
-  const unsigned int *const cnt = (const unsigned int *)counts;
-
-  const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int);
-
-  for (unsigned int i = 0; i < n_counts; i++) acc[i] += cnt[i];
-}
diff --git a/third_party/aom/av1/encoder/ethread.h b/third_party/aom/av1/encoder/ethread.h
deleted file mode 100644
index 5de4b4803..000000000
--- a/third_party/aom/av1/encoder/ethread.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ETHREAD_H_
-#define AOM_AV1_ENCODER_ETHREAD_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1_COMP;
-struct ThreadData;
-
-typedef struct EncWorkerData {
-  struct AV1_COMP *cpi;
-  struct ThreadData *td;
-  int start;
-} EncWorkerData;
-
-void av1_encode_tiles_mt(struct AV1_COMP *cpi);
-
-void av1_accumulate_frame_counts(struct FRAME_COUNTS *acc_counts,
-                                 const struct FRAME_COUNTS *counts);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_ETHREAD_H_
diff --git a/third_party/aom/av1/encoder/extend.c b/third_party/aom/av1/encoder/extend.c
deleted file mode 100644
index e9621a574..000000000
--- a/third_party/aom/av1/encoder/extend.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/common.h"
-#include "av1/encoder/extend.h"
-
-static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
-                                  uint8_t *dst, int dst_pitch, int w, int h,
-                                  int extend_top, int extend_left,
-                                  int extend_bottom, int extend_right) {
-  int i, linesize;
-
-  // copy the left and right most columns out
-  const uint8_t *src_ptr1 = src;
-  const uint8_t *src_ptr2 = src + w - 1;
-  uint8_t *dst_ptr1 = dst - extend_left;
-  uint8_t *dst_ptr2 = dst + w;
-
-  for (i = 0; i < h; i++) {
-    memset(dst_ptr1, src_ptr1[0], extend_left);
-    memcpy(dst_ptr1 + extend_left, src_ptr1, w);
-    memset(dst_ptr2, src_ptr2[0], extend_right);
-    src_ptr1 += src_pitch;
-    src_ptr2 += src_pitch;
-    dst_ptr1 += dst_pitch;
-    dst_ptr2 += dst_pitch;
-  }
-
-  // Now copy the top and bottom lines into each line of the respective
-  // borders
-  src_ptr1 = dst - extend_left;
-  src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
-  dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
-  dst_ptr2 = dst + dst_pitch * (h)-extend_left;
-  linesize = extend_left + extend_right + w;
-
-  for (i = 0; i < extend_top; i++) {
-    memcpy(dst_ptr1, src_ptr1, linesize);
-    dst_ptr1 += dst_pitch;
-  }
-
-  for (i = 0; i < extend_bottom; i++) {
-    memcpy(dst_ptr2, src_ptr2, linesize);
-    dst_ptr2 += dst_pitch;
-  }
-}
-
-static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
-                                         uint8_t *dst8, int dst_pitch, int w,
-                                         int h, int extend_top, int extend_left,
-                                         int extend_bottom, int extend_right) {
-  int i, linesize;
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-
-  // copy the left and right most columns out
-  const uint16_t *src_ptr1 = src;
-  const uint16_t *src_ptr2 = src + w - 1;
-  uint16_t *dst_ptr1 = dst - extend_left;
-  uint16_t *dst_ptr2 = dst + w;
-
-  for (i = 0; i < h; i++) {
-    aom_memset16(dst_ptr1, src_ptr1[0], extend_left);
-    memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(src_ptr1[0]));
-    aom_memset16(dst_ptr2, src_ptr2[0], extend_right);
-    src_ptr1 += src_pitch;
-    src_ptr2 += src_pitch;
-    dst_ptr1 += dst_pitch;
-    dst_ptr2 += dst_pitch;
-  }
-
-  // Now copy the top and bottom lines into each line of the respective
-  // borders
-  src_ptr1 = dst - extend_left;
-  src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
-  dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
-  dst_ptr2 = dst + dst_pitch * (h)-extend_left;
-  linesize = extend_left + extend_right + w;
-
-  for (i = 0; i < extend_top; i++) {
-    memcpy(dst_ptr1, src_ptr1, linesize * sizeof(src_ptr1[0]));
-    dst_ptr1 += dst_pitch;
-  }
-
-  for (i = 0; i < extend_bottom; i++) {
-    memcpy(dst_ptr2, src_ptr2, linesize * sizeof(src_ptr2[0]));
-    dst_ptr2 += dst_pitch;
-  }
-}
-
-void av1_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
-                               YV12_BUFFER_CONFIG *dst) {
-  // Extend src frame in buffer
-  // Altref filtering assumes 16 pixel extension
-  const int et_y = 16;
-  const int el_y = 16;
-  // Motion estimation may use src block variance with the block size up
-  // to 64x64, so the right and bottom need to be extended to 64 multiple
-  // or up to 16, whichever is greater.
-  const int er_y =
-      AOMMAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) -
-      src->y_crop_width;
-  const int eb_y =
-      AOMMAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) -
-      src->y_crop_height;
-  const int uv_width_subsampling = (src->uv_width != src->y_width);
-  const int uv_height_subsampling = (src->uv_height != src->y_height);
-  const int et_uv = et_y >> uv_height_subsampling;
-  const int el_uv = el_y >> uv_width_subsampling;
-  const int eb_uv = eb_y >> uv_height_subsampling;
-  const int er_uv = er_y >> uv_width_subsampling;
-
-  if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
-    highbd_copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
-                                 dst->y_stride, src->y_crop_width,
-                                 src->y_crop_height, et_y, el_y, eb_y, er_y);
-
-    highbd_copy_and_extend_plane(
-        src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride,
-        src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
-
-    highbd_copy_and_extend_plane(
-        src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride,
-        src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
-    return;
-  }
-
-  copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
-                        dst->y_stride, src->y_crop_width, src->y_crop_height,
-                        et_y, el_y, eb_y, er_y);
-
-  copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
-                        dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
-                        et_uv, el_uv, eb_uv, er_uv);
-
-  copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
-                        dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
-                        et_uv, el_uv, eb_uv, er_uv);
-}
-
-void av1_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
-                                         YV12_BUFFER_CONFIG *dst, int srcy,
-                                         int srcx, int srch, int srcw) {
-  // If the side is not touching the bounder then don't extend.
-  const int et_y = srcy ? 0 : dst->border;
-  const int el_y = srcx ? 0 : dst->border;
-  const int eb_y = srcy + srch != src->y_height
-                       ? 0
-                       : dst->border + dst->y_height - src->y_height;
-  const int er_y = srcx + srcw != src->y_width
-                       ? 0
-                       : dst->border + dst->y_width - src->y_width;
-  const int src_y_offset = srcy * src->y_stride + srcx;
-  const int dst_y_offset = srcy * dst->y_stride + srcx;
-
-  const int et_uv = ROUND_POWER_OF_TWO(et_y, 1);
-  const int el_uv = ROUND_POWER_OF_TWO(el_y, 1);
-  const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1);
-  const int er_uv = ROUND_POWER_OF_TWO(er_y, 1);
-  const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
-  const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
-  const int srch_uv = ROUND_POWER_OF_TWO(srch, 1);
-  const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1);
-
-  copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
-                        dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch,
-                        et_y, el_y, eb_y, er_y);
-
-  copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
-                        dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
-                        srch_uv, et_uv, el_uv, eb_uv, er_uv);
-
-  copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
-                        dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
-                        srch_uv, et_uv, el_uv, eb_uv, er_uv);
-}
diff --git a/third_party/aom/av1/encoder/extend.h b/third_party/aom/av1/encoder/extend.h
deleted file mode 100644
index e0432cc97..000000000
--- a/third_party/aom/av1/encoder/extend.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_EXTEND_H_
-#define AOM_AV1_ENCODER_EXTEND_H_
-
-#include "aom_scale/yv12config.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
-                               YV12_BUFFER_CONFIG *dst);
-
-void av1_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
-                                         YV12_BUFFER_CONFIG *dst, int srcy,
-                                         int srcx, int srch, int srcw);
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_EXTEND_H_
diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c
deleted file mode 100644
index 69dd20c52..000000000
--- a/third_party/aom/av1/encoder/firstpass.c
+++ /dev/null
@@ -1,3480 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-#include "aom_scale/aom_scale.h"
-#include "aom_scale/yv12config.h"
-
-#include "aom_dsp/variance.h"
-#include "av1/common/entropymv.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"  // av1_setup_dst_planes()
-#include "av1/common/txb_common.h"
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/block.h"
-#include "av1/encoder/dwt.h"
-#include "av1/encoder/encodeframe.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/extend.h"
-#include "av1/encoder/firstpass.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/reconinter_enc.h"
-
-#define OUTPUT_FPF 0
-#define ARF_STATS_OUTPUT 0
-
-#define GROUP_ADAPTIVE_MAXQ 1
-
-#define BOOST_BREAKOUT 12.5
-#define BOOST_FACTOR 12.5
-#define FACTOR_PT_LOW 0.70
-#define FACTOR_PT_HIGH 0.90
-#define FIRST_PASS_Q 10.0
-#define GF_MAX_BOOST 90.0
-#define INTRA_MODE_PENALTY 1024
-#define KF_MIN_FRAME_BOOST 80.0
-#define KF_MAX_FRAME_BOOST 128.0
-#define MIN_ARF_GF_BOOST 240
-#define MIN_DECAY_FACTOR 0.01
-#define MIN_KF_BOOST 300
-#define NEW_MV_MODE_PENALTY 32
-#define DARK_THRESH 64
-#define DEFAULT_GRP_WEIGHT 1.0
-#define RC_FACTOR_MIN 0.75
-#define RC_FACTOR_MAX 1.75
-#define MIN_FWD_KF_INTERVAL 8
-
-#define NCOUNT_INTRA_THRESH 8192
-#define NCOUNT_INTRA_FACTOR 3
-#define NCOUNT_FRAME_II_THRESH 5.0
-
-#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001)
-
-#if ARF_STATS_OUTPUT
-unsigned int arf_count = 0;
-#endif
-
-// Resets the first pass file to the given position using a relative seek from
-// the current position.
-static void reset_fpf_position(TWO_PASS *p, const FIRSTPASS_STATS *position) {
-  p->stats_in = position;
-}
-
-// Read frame stats at an offset from the current position.
-static const FIRSTPASS_STATS *read_frame_stats(const TWO_PASS *p, int offset) {
-  if ((offset >= 0 && p->stats_in + offset >= p->stats_in_end) ||
-      (offset < 0 && p->stats_in + offset < p->stats_in_start)) {
-    return NULL;
-  }
-
-  return &p->stats_in[offset];
-}
-
-static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
-  if (p->stats_in >= p->stats_in_end) return EOF;
-
-  *fps = *p->stats_in;
-  ++p->stats_in;
-  return 1;
-}
-
-static void output_stats(FIRSTPASS_STATS *stats,
-                         struct aom_codec_pkt_list *pktlist) {
-  struct aom_codec_cx_pkt pkt;
-  pkt.kind = AOM_CODEC_STATS_PKT;
-  pkt.data.twopass_stats.buf = stats;
-  pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
-  aom_codec_pkt_list_add(pktlist, &pkt);
-
-// TEMP debug code
-#if OUTPUT_FPF
-  {
-    FILE *fpfile;
-    fpfile = fopen("firstpass.stt", "a");
-
-    fprintf(fpfile,
-            "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
-            "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
-            "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf\n",
-            stats->frame, stats->weight, stats->intra_error, stats->coded_error,
-            stats->sr_coded_error, stats->pcnt_inter, stats->pcnt_motion,
-            stats->pcnt_second_ref, stats->pcnt_neutral, stats->intra_skip_pct,
-            stats->inactive_zone_rows, stats->inactive_zone_cols, stats->MVr,
-            stats->mvr_abs, stats->MVc, stats->mvc_abs, stats->MVrv,
-            stats->MVcv, stats->mv_in_out_count, stats->new_mv_count,
-            stats->count, stats->duration);
-    fclose(fpfile);
-  }
-#endif
-}
-
-#if CONFIG_FP_MB_STATS
-static void output_fpmb_stats(uint8_t *this_frame_mb_stats, int stats_size,
-                              struct aom_codec_pkt_list *pktlist) {
-  struct aom_codec_cx_pkt pkt;
-  pkt.kind = AOM_CODEC_FPMB_STATS_PKT;
-  pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats;
-  pkt.data.firstpass_mb_stats.sz = stats_size * sizeof(*this_frame_mb_stats);
-  aom_codec_pkt_list_add(pktlist, &pkt);
-}
-#endif
-
-static void zero_stats(FIRSTPASS_STATS *section) {
-  section->frame = 0.0;
-  section->weight = 0.0;
-  section->intra_error = 0.0;
-  section->frame_avg_wavelet_energy = 0.0;
-  section->coded_error = 0.0;
-  section->sr_coded_error = 0.0;
-  section->pcnt_inter = 0.0;
-  section->pcnt_motion = 0.0;
-  section->pcnt_second_ref = 0.0;
-  section->pcnt_neutral = 0.0;
-  section->intra_skip_pct = 0.0;
-  section->inactive_zone_rows = 0.0;
-  section->inactive_zone_cols = 0.0;
-  section->MVr = 0.0;
-  section->mvr_abs = 0.0;
-  section->MVc = 0.0;
-  section->mvc_abs = 0.0;
-  section->MVrv = 0.0;
-  section->MVcv = 0.0;
-  section->mv_in_out_count = 0.0;
-  section->new_mv_count = 0.0;
-  section->count = 0.0;
-  section->duration = 1.0;
-}
-
-static void accumulate_stats(FIRSTPASS_STATS *section,
-                             const FIRSTPASS_STATS *frame) {
-  section->frame += frame->frame;
-  section->weight += frame->weight;
-  section->intra_error += frame->intra_error;
-  section->frame_avg_wavelet_energy += frame->frame_avg_wavelet_energy;
-  section->coded_error += frame->coded_error;
-  section->sr_coded_error += frame->sr_coded_error;
-  section->pcnt_inter += frame->pcnt_inter;
-  section->pcnt_motion += frame->pcnt_motion;
-  section->pcnt_second_ref += frame->pcnt_second_ref;
-  section->pcnt_neutral += frame->pcnt_neutral;
-  section->intra_skip_pct += frame->intra_skip_pct;
-  section->inactive_zone_rows += frame->inactive_zone_rows;
-  section->inactive_zone_cols += frame->inactive_zone_cols;
-  section->MVr += frame->MVr;
-  section->mvr_abs += frame->mvr_abs;
-  section->MVc += frame->MVc;
-  section->mvc_abs += frame->mvc_abs;
-  section->MVrv += frame->MVrv;
-  section->MVcv += frame->MVcv;
-  section->mv_in_out_count += frame->mv_in_out_count;
-  section->new_mv_count += frame->new_mv_count;
-  section->count += frame->count;
-  section->duration += frame->duration;
-}
-
-static void subtract_stats(FIRSTPASS_STATS *section,
-                           const FIRSTPASS_STATS *frame) {
-  section->frame -= frame->frame;
-  section->weight -= frame->weight;
-  section->intra_error -= frame->intra_error;
-  section->frame_avg_wavelet_energy -= frame->frame_avg_wavelet_energy;
-  section->coded_error -= frame->coded_error;
-  section->sr_coded_error -= frame->sr_coded_error;
-  section->pcnt_inter -= frame->pcnt_inter;
-  section->pcnt_motion -= frame->pcnt_motion;
-  section->pcnt_second_ref -= frame->pcnt_second_ref;
-  section->pcnt_neutral -= frame->pcnt_neutral;
-  section->intra_skip_pct -= frame->intra_skip_pct;
-  section->inactive_zone_rows -= frame->inactive_zone_rows;
-  section->inactive_zone_cols -= frame->inactive_zone_cols;
-  section->MVr -= frame->MVr;
-  section->mvr_abs -= frame->mvr_abs;
-  section->MVc -= frame->MVc;
-  section->mvc_abs -= frame->mvc_abs;
-  section->MVrv -= frame->MVrv;
-  section->MVcv -= frame->MVcv;
-  section->mv_in_out_count -= frame->mv_in_out_count;
-  section->new_mv_count -= frame->new_mv_count;
-  section->count -= frame->count;
-  section->duration -= frame->duration;
-}
-
-// Calculate the linear size relative to a baseline of 1080P
-#define BASE_SIZE 2073600.0  // 1920x1080
-static double get_linear_size_factor(const AV1_COMP *cpi) {
-  const double this_area = cpi->initial_width * cpi->initial_height;
-  return pow(this_area / BASE_SIZE, 0.5);
-}
-
-// Calculate an active area of the image that discounts formatting
-// bars and partially discounts other 0 energy areas.
-#define MIN_ACTIVE_AREA 0.5
-#define MAX_ACTIVE_AREA 1.0
-static double calculate_active_area(const AV1_COMP *cpi,
-                                    const FIRSTPASS_STATS *this_frame) {
-  double active_pct;
-
-  active_pct =
-      1.0 -
-      ((this_frame->intra_skip_pct / 2) +
-       ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows));
-  return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA);
-}
-
-// Calculate a modified Error used in distributing bits between easier and
-// harder frames.
-#define ACT_AREA_CORRECTION 0.5
-static double calculate_modified_err(const AV1_COMP *cpi,
-                                     const TWO_PASS *twopass,
-                                     const AV1EncoderConfig *oxcf,
-                                     const FIRSTPASS_STATS *this_frame) {
-  const FIRSTPASS_STATS *const stats = &twopass->total_stats;
-  const double av_weight = stats->weight / stats->count;
-  const double av_err = (stats->coded_error * av_weight) / stats->count;
-  double modified_error =
-      av_err * pow(this_frame->coded_error * this_frame->weight /
-                       DOUBLE_DIVIDE_CHECK(av_err),
-                   oxcf->two_pass_vbrbias / 100.0);
-
-  // Correction for active area. Frames with a reduced active area
-  // (eg due to formatting bars) have a higher error per mb for the
-  // remaining active MBs. The correction here assumes that coding
-  // 0.5N blocks of complexity 2X is a little easier than coding N
-  // blocks of complexity X.
-  modified_error *=
-      pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);
-
-  return fclamp(modified_error, twopass->modified_error_min,
-                twopass->modified_error_max);
-}
-
-// This function returns the maximum target rate per frame.
-static int frame_max_bits(const RATE_CONTROL *rc,
-                          const AV1EncoderConfig *oxcf) {
-  int64_t max_bits = ((int64_t)rc->avg_frame_bandwidth *
-                      (int64_t)oxcf->two_pass_vbrmax_section) /
-                     100;
-  if (max_bits < 0)
-    max_bits = 0;
-  else if (max_bits > rc->max_frame_bandwidth)
-    max_bits = rc->max_frame_bandwidth;
-
-  return (int)max_bits;
-}
-
-void av1_init_first_pass(AV1_COMP *cpi) {
-  zero_stats(&cpi->twopass.total_stats);
-}
-
-void av1_end_first_pass(AV1_COMP *cpi) {
-  output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
-}
-
-static aom_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
-  switch (bsize) {
-    case BLOCK_8X8: return aom_mse8x8;
-    case BLOCK_16X8: return aom_mse16x8;
-    case BLOCK_8X16: return aom_mse8x16;
-    default: return aom_mse16x16;
-  }
-}
-
-static unsigned int get_prediction_error(BLOCK_SIZE bsize,
-                                         const struct buf_2d *src,
-                                         const struct buf_2d *ref) {
-  unsigned int sse;
-  const aom_variance_fn_t fn = get_block_variance_fn(bsize);
-  fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
-  return sse;
-}
-
-static aom_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
-                                                      int bd) {
-  switch (bd) {
-    default:
-      switch (bsize) {
-        case BLOCK_8X8: return aom_highbd_8_mse8x8;
-        case BLOCK_16X8: return aom_highbd_8_mse16x8;
-        case BLOCK_8X16: return aom_highbd_8_mse8x16;
-        default: return aom_highbd_8_mse16x16;
-      }
-      break;
-    case 10:
-      switch (bsize) {
-        case BLOCK_8X8: return aom_highbd_10_mse8x8;
-        case BLOCK_16X8: return aom_highbd_10_mse16x8;
-        case BLOCK_8X16: return aom_highbd_10_mse8x16;
-        default: return aom_highbd_10_mse16x16;
-      }
-      break;
-    case 12:
-      switch (bsize) {
-        case BLOCK_8X8: return aom_highbd_12_mse8x8;
-        case BLOCK_16X8: return aom_highbd_12_mse16x8;
-        case BLOCK_8X16: return aom_highbd_12_mse8x16;
-        default: return aom_highbd_12_mse16x16;
-      }
-      break;
-  }
-}
-
-static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize,
-                                                const struct buf_2d *src,
-                                                const struct buf_2d *ref,
-                                                int bd) {
-  unsigned int sse;
-  const aom_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd);
-  fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
-  return sse;
-}
-
-// Refine the motion search range according to the frame dimension
-// for first pass test.
-static int get_search_range(const AV1_COMP *cpi) {
-  int sr = 0;
-  const int dim = AOMMIN(cpi->initial_width, cpi->initial_height);
-
-  while ((dim << sr) < MAX_FULL_PEL_VAL) ++sr;
-  return sr;
-}
-
-static void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
-                                     const MV *ref_mv, MV *best_mv,
-                                     int *best_motion_err) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MV tmp_mv = kZeroMv;
-  MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
-  int num00, tmp_err, n;
-  const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-  aom_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
-  const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
-
-  int step_param = 3;
-  int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
-  const int sr = get_search_range(cpi);
-  step_param += sr;
-  further_steps -= sr;
-
-  // Override the default variance function to use MSE.
-  v_fn_ptr.vf = get_block_variance_fn(bsize);
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
-  }
-
-  // Center the initial step/diamond search on best mv.
-  tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
-                                    step_param, x->sadperbit16, &num00,
-                                    &v_fn_ptr, ref_mv);
-  if (tmp_err < INT_MAX)
-    tmp_err = av1_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
-  if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty;
-
-  if (tmp_err < *best_motion_err) {
-    *best_motion_err = tmp_err;
-    *best_mv = tmp_mv;
-  }
-
-  // Carry out further step/diamond searches as necessary.
-  n = num00;
-  num00 = 0;
-
-  while (n < further_steps) {
-    ++n;
-
-    if (num00) {
-      --num00;
-    } else {
-      tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
-                                        step_param + n, x->sadperbit16, &num00,
-                                        &v_fn_ptr, ref_mv);
-      if (tmp_err < INT_MAX)
-        tmp_err = av1_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
-      if (tmp_err < INT_MAX - new_mv_mode_penalty)
-        tmp_err += new_mv_mode_penalty;
-
-      if (tmp_err < *best_motion_err) {
-        *best_motion_err = tmp_err;
-        *best_mv = tmp_mv;
-      }
-    }
-  }
-}
-
-static BLOCK_SIZE get_bsize(const AV1_COMMON *cm, int mb_row, int mb_col) {
-  if (mi_size_wide[BLOCK_16X16] * mb_col + mi_size_wide[BLOCK_8X8] <
-      cm->mi_cols) {
-    return mi_size_wide[BLOCK_16X16] * mb_row + mi_size_wide[BLOCK_8X8] <
-                   cm->mi_rows
-               ? BLOCK_16X16
-               : BLOCK_16X8;
-  } else {
-    return mi_size_wide[BLOCK_16X16] * mb_row + mi_size_wide[BLOCK_8X8] <
-                   cm->mi_rows
-               ? BLOCK_8X16
-               : BLOCK_8X8;
-  }
-}
-
-static int find_fp_qindex(aom_bit_depth_t bit_depth) {
-  int i;
-
-  for (i = 0; i < QINDEX_RANGE; ++i)
-    if (av1_convert_qindex_to_q(i, bit_depth) >= FIRST_PASS_Q) break;
-
-  if (i == QINDEX_RANGE) i--;
-
-  return i;
-}
-
-static void set_first_pass_params(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  if (!cpi->refresh_alt_ref_frame &&
-      (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY))) {
-    cm->frame_type = KEY_FRAME;
-  } else {
-    cm->frame_type = INTER_FRAME;
-  }
-  // Do not use periodic key frames.
-  cpi->rc.frames_to_key = INT_MAX;
-}
-
-static double raw_motion_error_stdev(int *raw_motion_err_list,
-                                     int raw_motion_err_counts) {
-  int64_t sum_raw_err = 0;
-  double raw_err_avg = 0;
-  double raw_err_stdev = 0;
-  if (raw_motion_err_counts == 0) return 0;
-
-  int i;
-  for (i = 0; i < raw_motion_err_counts; i++) {
-    sum_raw_err += raw_motion_err_list[i];
-  }
-  raw_err_avg = (double)sum_raw_err / raw_motion_err_counts;
-  for (i = 0; i < raw_motion_err_counts; i++) {
-    raw_err_stdev += (raw_motion_err_list[i] - raw_err_avg) *
-                     (raw_motion_err_list[i] - raw_err_avg);
-  }
-  // Calculate the standard deviation for the motion error of all the inter
-  // blocks of the 0,0 motion using the last source
-  // frame as the reference.
-  raw_err_stdev = sqrt(raw_err_stdev / raw_motion_err_counts);
-  return raw_err_stdev;
-}
-
-#define UL_INTRA_THRESH 50
-#define INVALID_ROW -1
-void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
-  int mb_row, mb_col;
-  MACROBLOCK *const x = &cpi->td.mb;
-  AV1_COMMON *const cm = &cpi->common;
-  const SequenceHeader *const seq_params = &cm->seq_params;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  TileInfo tile;
-  struct macroblock_plane *const p = x->plane;
-  struct macroblockd_plane *const pd = xd->plane;
-  const PICK_MODE_CONTEXT *ctx =
-      &cpi->td.pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2]->none;
-  int i;
-
-  int recon_yoffset, recon_uvoffset;
-  int64_t intra_error = 0;
-  int64_t frame_avg_wavelet_energy = 0;
-  int64_t coded_error = 0;
-  int64_t sr_coded_error = 0;
-
-  int sum_mvr = 0, sum_mvc = 0;
-  int sum_mvr_abs = 0, sum_mvc_abs = 0;
-  int64_t sum_mvrs = 0, sum_mvcs = 0;
-  int mvcount = 0;
-  int intercount = 0;
-  int second_ref_count = 0;
-  const int intrapenalty = INTRA_MODE_PENALTY;
-  double neutral_count;
-  int intra_skip_count = 0;
-  int image_data_start_row = INVALID_ROW;
-  int new_mv_count = 0;
-  int sum_in_vectors = 0;
-  MV lastmv = kZeroMv;
-  TWO_PASS *twopass = &cpi->twopass;
-  int recon_y_stride, recon_uv_stride, uv_mb_height;
-
-  YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
-  YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
-  YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
-  const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
-  double intra_factor;
-  double brightness_factor;
-  BufferPool *const pool = cm->buffer_pool;
-  const int qindex = find_fp_qindex(seq_params->bit_depth);
-  const int mb_scale = mi_size_wide[BLOCK_16X16];
-
-  int *raw_motion_err_list;
-  int raw_motion_err_counts = 0;
-  CHECK_MEM_ERROR(
-      cm, raw_motion_err_list,
-      aom_calloc(cm->mb_rows * cm->mb_cols, sizeof(*raw_motion_err_list)));
-  // First pass code requires valid last and new frame buffers.
-  assert(new_yv12 != NULL);
-  assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
-
-#if CONFIG_FP_MB_STATS
-  if (cpi->use_fp_mb_stats) {
-    av1_zero_array(cpi->twopass.frame_mb_stats_buf, cpi->initial_mbs);
-  }
-#endif
-
-  aom_clear_system_state();
-
-  xd->mi = cm->mi_grid_visible;
-  xd->mi[0] = cm->mi;
-  x->e_mbd.mi[0]->sb_type = BLOCK_16X16;
-
-  intra_factor = 0.0;
-  brightness_factor = 0.0;
-  neutral_count = 0.0;
-
-  set_first_pass_params(cpi);
-  av1_set_quantizer(cm, qindex);
-
-  av1_setup_block_planes(&x->e_mbd, seq_params->subsampling_x,
-                         seq_params->subsampling_y, num_planes);
-
-  av1_setup_src_planes(x, cpi->source, 0, 0, num_planes);
-  av1_setup_dst_planes(xd->plane, seq_params->sb_size, new_yv12, 0, 0, 0,
-                       num_planes);
-
-  if (!frame_is_intra_only(cm)) {
-    av1_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL, num_planes);
-  }
-
-  xd->mi = cm->mi_grid_visible;
-  xd->mi[0] = cm->mi;
-
-  // Don't store luma on the fist pass since chroma is not computed
-  xd->cfl.store_y = 0;
-  av1_frame_init_quantizer(cpi);
-
-  for (i = 0; i < num_planes; ++i) {
-    p[i].coeff = ctx->coeff[i];
-    p[i].qcoeff = ctx->qcoeff[i];
-    pd[i].dqcoeff = ctx->dqcoeff[i];
-    p[i].eobs = ctx->eobs[i];
-    p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
-  }
-
-  av1_init_mv_probs(cm);
-  av1_init_lv_map(cm);
-  av1_initialize_rd_consts(cpi);
-
-  // Tiling is ignored in the first pass.
-  av1_tile_init(&tile, cm, 0, 0);
-
-  recon_y_stride = new_yv12->y_stride;
-  recon_uv_stride = new_yv12->uv_stride;
-  uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);
-
-  for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
-    MV best_ref_mv = kZeroMv;
-
-    // Reset above block coeffs.
-    xd->up_available = (mb_row != 0);
-    recon_yoffset = (mb_row * recon_y_stride * 16);
-    recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height);
-
-    // Set up limit values for motion vectors to prevent them extending
-    // outside the UMV borders.
-    x->mv_limits.row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
-    x->mv_limits.row_max =
-        ((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16;
-
-    for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
-      int this_error;
-      const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
-      const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
-      double log_intra;
-      int level_sample;
-
-#if CONFIG_FP_MB_STATS
-      const int mb_index = mb_row * cm->mb_cols + mb_col;
-#endif
-
-      aom_clear_system_state();
-
-      const int idx_str = xd->mi_stride * mb_row * mb_scale + mb_col * mb_scale;
-      xd->mi = cm->mi_grid_visible + idx_str;
-      xd->mi[0] = cm->mi + idx_str;
-      xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
-      xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
-      xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
-      xd->left_available = (mb_col != 0);
-      xd->mi[0]->sb_type = bsize;
-      xd->mi[0]->ref_frame[0] = INTRA_FRAME;
-      set_mi_row_col(xd, &tile, mb_row * mb_scale, mi_size_high[bsize],
-                     mb_col * mb_scale, mi_size_wide[bsize], cm->mi_rows,
-                     cm->mi_cols);
-
-      set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], num_planes);
-
-      // Do intra 16x16 prediction.
-      xd->mi[0]->segment_id = 0;
-      xd->lossless[xd->mi[0]->segment_id] = (qindex == 0);
-      xd->mi[0]->mode = DC_PRED;
-      xd->mi[0]->tx_size =
-          use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
-      av1_encode_intra_block_plane(cpi, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
-      this_error = aom_get_mb_ss(x->plane[0].src_diff);
-
-      // Keep a record of blocks that have almost no intra error residual
-      // (i.e. are in effect completely flat and untextured in the intra
-      // domain). In natural videos this is uncommon, but it is much more
-      // common in animations, graphics and screen content, so may be used
-      // as a signal to detect these types of content.
-      if (this_error < UL_INTRA_THRESH) {
-        ++intra_skip_count;
-      } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
-        image_data_start_row = mb_row;
-      }
-
-      if (seq_params->use_highbitdepth) {
-        switch (seq_params->bit_depth) {
-          case AOM_BITS_8: break;
-          case AOM_BITS_10: this_error >>= 4; break;
-          case AOM_BITS_12: this_error >>= 8; break;
-          default:
-            assert(0 &&
-                   "seq_params->bit_depth should be AOM_BITS_8, "
-                   "AOM_BITS_10 or AOM_BITS_12");
-            return;
-        }
-      }
-
-      aom_clear_system_state();
-      log_intra = log(this_error + 1.0);
-      if (log_intra < 10.0)
-        intra_factor += 1.0 + ((10.0 - log_intra) * 0.05);
-      else
-        intra_factor += 1.0;
-
-      if (seq_params->use_highbitdepth)
-        level_sample = CONVERT_TO_SHORTPTR(x->plane[0].src.buf)[0];
-      else
-        level_sample = x->plane[0].src.buf[0];
-      if ((level_sample < DARK_THRESH) && (log_intra < 9.0))
-        brightness_factor += 1.0 + (0.01 * (DARK_THRESH - level_sample));
-      else
-        brightness_factor += 1.0;
-
-      // Intrapenalty below deals with situations where the intra and inter
-      // error scores are very low (e.g. a plain black frame).
-      // We do not have special cases in first pass for 0,0 and nearest etc so
-      // all inter modes carry an overhead cost estimate for the mv.
-      // When the error score is very low this causes us to pick all or lots of
-      // INTRA modes and throw lots of key frames.
-      // This penalty adds a cost matching that of a 0,0 mv to the intra case.
-      this_error += intrapenalty;
-
-      // Accumulate the intra error.
-      intra_error += (int64_t)this_error;
-
-      int stride = x->plane[0].src.stride;
-      uint8_t *buf = x->plane[0].src.buf;
-      for (int r8 = 0; r8 < 2; ++r8)
-        for (int c8 = 0; c8 < 2; ++c8) {
-          int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
-          frame_avg_wavelet_energy += av1_haar_ac_sad_8x8_uint8_input(
-              buf + c8 * 8 + r8 * 8 * stride, stride, hbd);
-        }
-
-#if CONFIG_FP_MB_STATS
-      if (cpi->use_fp_mb_stats) {
-        // initialization
-        cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
-      }
-#endif
-
-      // Set up limit values for motion vectors to prevent them extending
-      // outside the UMV borders.
-      x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
-      x->mv_limits.col_max =
-          ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
-
-      if (!frame_is_intra_only(cm)) {  // Do a motion search
-        int tmp_err, motion_error, raw_motion_error;
-        // Assume 0,0 motion with no mv overhead.
-        MV mv = kZeroMv, tmp_mv = kZeroMv;
-        struct buf_2d unscaled_last_source_buf_2d;
-
-        xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-          motion_error = highbd_get_prediction_error(
-              bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
-        } else {
-          motion_error = get_prediction_error(bsize, &x->plane[0].src,
-                                              &xd->plane[0].pre[0]);
-        }
-
-        // Compute the motion error of the 0,0 motion using the last source
-        // frame as the reference. Skip the further motion search on
-        // reconstructed frame if this error is small.
-        unscaled_last_source_buf_2d.buf =
-            cpi->unscaled_last_source->y_buffer + recon_yoffset;
-        unscaled_last_source_buf_2d.stride =
-            cpi->unscaled_last_source->y_stride;
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-          raw_motion_error = highbd_get_prediction_error(
-              bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
-        } else {
-          raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
-                                                  &unscaled_last_source_buf_2d);
-        }
-
-        // TODO(pengchong): Replace the hard-coded threshold
-        if (raw_motion_error > 25) {
-          // Test last reference frame using the previous best mv as the
-          // starting point (best reference) for the search.
-          first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error);
-
-          // If the current best reference mv is not centered on 0,0 then do a
-          // 0,0 based search as well.
-          if (!is_zero_mv(&best_ref_mv)) {
-            tmp_err = INT_MAX;
-            first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv, &tmp_err);
-
-            if (tmp_err < motion_error) {
-              motion_error = tmp_err;
-              mv = tmp_mv;
-            }
-          }
-
-          // Search in an older reference frame.
-          if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
-            // Assume 0,0 motion with no mv overhead.
-            int gf_motion_error;
-
-            xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
-            if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-              gf_motion_error = highbd_get_prediction_error(
-                  bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
-            } else {
-              gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
-                                                     &xd->plane[0].pre[0]);
-            }
-
-            first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv,
-                                     &gf_motion_error);
-
-            if (gf_motion_error < motion_error && gf_motion_error < this_error)
-              ++second_ref_count;
-
-            // Reset to last frame as reference buffer.
-            xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
-            xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
-            xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
-
-            // In accumulating a score for the older reference frame take the
-            // best of the motion predicted score and the intra coded error
-            // (just as will be done for) accumulation of "coded_error" for
-            // the last frame.
-            if (gf_motion_error < this_error)
-              sr_coded_error += gf_motion_error;
-            else
-              sr_coded_error += this_error;
-          } else {
-            sr_coded_error += motion_error;
-          }
-        } else {
-          sr_coded_error += motion_error;
-        }
-
-        // Start by assuming that intra mode is best.
-        best_ref_mv.row = 0;
-        best_ref_mv.col = 0;
-
-#if CONFIG_FP_MB_STATS
-        if (cpi->use_fp_mb_stats) {
-          // intra predication statistics
-          cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
-          cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK;
-          cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
-          if (this_error > FPMB_ERROR_LARGE_TH) {
-            cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;
-          } else if (this_error < FPMB_ERROR_SMALL_TH) {
-            cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;
-          }
-        }
-#endif
-
-        if (motion_error <= this_error) {
-          aom_clear_system_state();
-
-          // Keep a count of cases where the inter and intra were very close
-          // and very low. This helps with scene cut detection for example in
-          // cropped clips with black bars at the sides or top and bottom.
-          if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
-              (this_error < (2 * intrapenalty))) {
-            neutral_count += 1.0;
-            // Also track cases where the intra is not much worse than the inter
-            // and use this in limiting the GF/arf group length.
-          } else if ((this_error > NCOUNT_INTRA_THRESH) &&
-                     (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
-            neutral_count +=
-                (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
-          }
-
-          mv.row *= 8;
-          mv.col *= 8;
-          this_error = motion_error;
-          xd->mi[0]->mode = NEWMV;
-          xd->mi[0]->mv[0].as_mv = mv;
-          xd->mi[0]->tx_size = TX_4X4;
-          xd->mi[0]->ref_frame[0] = LAST_FRAME;
-          xd->mi[0]->ref_frame[1] = NONE_FRAME;
-          av1_build_inter_predictors_sby(cm, xd, mb_row * mb_scale,
-                                         mb_col * mb_scale, NULL, bsize);
-          av1_encode_sby_pass1(cm, x, bsize);
-          sum_mvr += mv.row;
-          sum_mvr_abs += abs(mv.row);
-          sum_mvc += mv.col;
-          sum_mvc_abs += abs(mv.col);
-          sum_mvrs += mv.row * mv.row;
-          sum_mvcs += mv.col * mv.col;
-          ++intercount;
-
-          best_ref_mv = mv;
-
-#if CONFIG_FP_MB_STATS
-          if (cpi->use_fp_mb_stats) {
-            // inter predication statistics
-            cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
-            cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK;
-            cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
-            if (this_error > FPMB_ERROR_LARGE_TH) {
-              cpi->twopass.frame_mb_stats_buf[mb_index] |=
-                  FPMB_ERROR_LARGE_MASK;
-            } else if (this_error < FPMB_ERROR_SMALL_TH) {
-              cpi->twopass.frame_mb_stats_buf[mb_index] |=
-                  FPMB_ERROR_SMALL_MASK;
-            }
-          }
-#endif
-
-          if (!is_zero_mv(&mv)) {
-            ++mvcount;
-
-#if CONFIG_FP_MB_STATS
-            if (cpi->use_fp_mb_stats) {
-              cpi->twopass.frame_mb_stats_buf[mb_index] &=
-                  ~FPMB_MOTION_ZERO_MASK;
-              // check estimated motion direction
-              if (mv.col > 0 && mv.col >= abs(mv.row)) {
-                // right direction
-                cpi->twopass.frame_mb_stats_buf[mb_index] |=
-                    FPMB_MOTION_RIGHT_MASK;
-              } else if (mv.row < 0 && abs(mv.row) >= abs(mv.col)) {
-                // up direction
-                cpi->twopass.frame_mb_stats_buf[mb_index] |=
-                    FPMB_MOTION_UP_MASK;
-              } else if (mv.col < 0 && abs(mv.col) >= abs(mv.row)) {
-                // left direction
-                cpi->twopass.frame_mb_stats_buf[mb_index] |=
-                    FPMB_MOTION_LEFT_MASK;
-              } else {
-                // down direction
-                cpi->twopass.frame_mb_stats_buf[mb_index] |=
-                    FPMB_MOTION_DOWN_MASK;
-              }
-            }
-#endif
-
-            // Non-zero vector, was it different from the last non zero vector?
-            if (!is_equal_mv(&mv, &lastmv)) ++new_mv_count;
-            lastmv = mv;
-
-            // Does the row vector point inwards or outwards?
-            if (mb_row < cm->mb_rows / 2) {
-              if (mv.row > 0)
-                --sum_in_vectors;
-              else if (mv.row < 0)
-                ++sum_in_vectors;
-            } else if (mb_row > cm->mb_rows / 2) {
-              if (mv.row > 0)
-                ++sum_in_vectors;
-              else if (mv.row < 0)
-                --sum_in_vectors;
-            }
-
-            // Does the col vector point inwards or outwards?
-            if (mb_col < cm->mb_cols / 2) {
-              if (mv.col > 0)
-                --sum_in_vectors;
-              else if (mv.col < 0)
-                ++sum_in_vectors;
-            } else if (mb_col > cm->mb_cols / 2) {
-              if (mv.col > 0)
-                ++sum_in_vectors;
-              else if (mv.col < 0)
-                --sum_in_vectors;
-            }
-          }
-        }
-        raw_motion_err_list[raw_motion_err_counts++] = raw_motion_error;
-      } else {
-        sr_coded_error += (int64_t)this_error;
-      }
-      coded_error += (int64_t)this_error;
-
-      // Adjust to the next column of MBs.
-      x->plane[0].src.buf += 16;
-      x->plane[1].src.buf += uv_mb_height;
-      x->plane[2].src.buf += uv_mb_height;
-
-      recon_yoffset += 16;
-      recon_uvoffset += uv_mb_height;
-    }
-    // Adjust to the next row of MBs.
-    x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
-    x->plane[1].src.buf +=
-        uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols;
-    x->plane[2].src.buf +=
-        uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols;
-
-    aom_clear_system_state();
-  }
-  const double raw_err_stdev =
-      raw_motion_error_stdev(raw_motion_err_list, raw_motion_err_counts);
-  aom_free(raw_motion_err_list);
-
-  // Clamp the image start to rows/2. This number of rows is discarded top
-  // and bottom as dead data so rows / 2 means the frame is blank.
-  if ((image_data_start_row > cm->mb_rows / 2) ||
-      (image_data_start_row == INVALID_ROW)) {
-    image_data_start_row = cm->mb_rows / 2;
-  }
-  // Exclude any image dead zone
-  if (image_data_start_row > 0) {
-    intra_skip_count =
-        AOMMAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
-  }
-
-  {
-    FIRSTPASS_STATS fps;
-    // The minimum error here insures some bit allocation to frames even
-    // in static regions. The allocation per MB declines for larger formats
-    // where the typical "real" energy per MB also falls.
-    // Initial estimate here uses sqrt(mbs) to define the min_err, where the
-    // number of mbs is proportional to the image area.
-    const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
-                            ? cpi->initial_mbs
-                            : cpi->common.MBs;
-    const double min_err = 200 * sqrt(num_mbs);
-
-    intra_factor = intra_factor / (double)num_mbs;
-    brightness_factor = brightness_factor / (double)num_mbs;
-    fps.weight = intra_factor * brightness_factor;
-
-    fps.frame = cm->current_video_frame;
-    fps.coded_error = (double)(coded_error >> 8) + min_err;
-    fps.sr_coded_error = (double)(sr_coded_error >> 8) + min_err;
-    fps.intra_error = (double)(intra_error >> 8) + min_err;
-    fps.frame_avg_wavelet_energy = (double)frame_avg_wavelet_energy;
-    fps.count = 1.0;
-    fps.pcnt_inter = (double)intercount / num_mbs;
-    fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
-    fps.pcnt_neutral = (double)neutral_count / num_mbs;
-    fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
-    fps.inactive_zone_rows = (double)image_data_start_row;
-    fps.inactive_zone_cols = (double)0;  // TODO(paulwilkins): fix
-    fps.raw_error_stdev = raw_err_stdev;
-
-    if (mvcount > 0) {
-      fps.MVr = (double)sum_mvr / mvcount;
-      fps.mvr_abs = (double)sum_mvr_abs / mvcount;
-      fps.MVc = (double)sum_mvc / mvcount;
-      fps.mvc_abs = (double)sum_mvc_abs / mvcount;
-      fps.MVrv =
-          ((double)sum_mvrs - ((double)sum_mvr * sum_mvr / mvcount)) / mvcount;
-      fps.MVcv =
-          ((double)sum_mvcs - ((double)sum_mvc * sum_mvc / mvcount)) / mvcount;
-      fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
-      fps.new_mv_count = new_mv_count;
-      fps.pcnt_motion = (double)mvcount / num_mbs;
-    } else {
-      fps.MVr = 0.0;
-      fps.mvr_abs = 0.0;
-      fps.MVc = 0.0;
-      fps.mvc_abs = 0.0;
-      fps.MVrv = 0.0;
-      fps.MVcv = 0.0;
-      fps.mv_in_out_count = 0.0;
-      fps.new_mv_count = 0.0;
-      fps.pcnt_motion = 0.0;
-    }
-
-    // TODO(paulwilkins):  Handle the case when duration is set to 0, or
-    // something less than the full time between subsequent values of
-    // cpi->source_time_stamp.
-    fps.duration = (double)(source->ts_end - source->ts_start);
-
-    // Don't want to do output stats with a stack variable!
-    twopass->this_frame_stats = fps;
-    output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
-    accumulate_stats(&twopass->total_stats, &fps);
-
-#if CONFIG_FP_MB_STATS
-    if (cpi->use_fp_mb_stats) {
-      output_fpmb_stats(twopass->frame_mb_stats_buf, cpi->initial_mbs,
-                        cpi->output_pkt_list);
-    }
-#endif
-  }
-
-  // Copy the previous Last Frame back into gf and and arf buffers if
-  // the prediction is good enough... but also don't allow it to lag too far.
-  if ((twopass->sr_update_lag > 3) ||
-      ((cm->current_video_frame > 0) &&
-       (twopass->this_frame_stats.pcnt_inter > 0.20) &&
-       ((twopass->this_frame_stats.intra_error /
-         DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
-    if (gld_yv12 != NULL) {
-      ref_cnt_fb(pool->frame_bufs,
-                 &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]],
-                 cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]]);
-    }
-    twopass->sr_update_lag = 1;
-  } else {
-    ++twopass->sr_update_lag;
-  }
-
-  aom_extend_frame_borders(new_yv12, num_planes);
-
-  // The frame we just compressed now becomes the last frame.
-  ref_cnt_fb(pool->frame_bufs,
-             &cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]],
-             cm->new_fb_idx);
-
-  // Special case for the first frame. Copy into the GF buffer as a second
-  // reference.
-  if (cm->current_video_frame == 0 &&
-      cpi->ref_fb_idx[GOLDEN_FRAME - 1] != INVALID_IDX) {
-    ref_cnt_fb(pool->frame_bufs,
-               &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]],
-               cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]]);
-  }
-
-  // Use this to see what the first pass reconstruction looks like.
-  if (0) {
-    char filename[512];
-    FILE *recon_file;
-    snprintf(filename, sizeof(filename), "enc%04d.yuv",
-             (int)cm->current_video_frame);
-
-    if (cm->current_video_frame == 0)
-      recon_file = fopen(filename, "wb");
-    else
-      recon_file = fopen(filename, "ab");
-
-    (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
-    fclose(recon_file);
-  }
-
-  ++cm->current_video_frame;
-}
-
-static double calc_correction_factor(double err_per_mb, double err_divisor,
-                                     double pt_low, double pt_high, int q,
-                                     aom_bit_depth_t bit_depth) {
-  const double error_term = err_per_mb / err_divisor;
-
-  // Adjustment based on actual quantizer to power term.
-  const double power_term =
-      AOMMIN(av1_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high);
-
-  // Calculate correction factor.
-  if (power_term < 1.0) assert(error_term >= 0.0);
-
-  return fclamp(pow(error_term, power_term), 0.05, 5.0);
-}
-
-#define ERR_DIVISOR 100.0
-static int get_twopass_worst_quality(const AV1_COMP *cpi,
-                                     const double section_err,
-                                     double inactive_zone,
-                                     int section_target_bandwidth,
-                                     double group_weight_factor) {
-  const RATE_CONTROL *const rc = &cpi->rc;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-
-  inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
-
-  if (section_target_bandwidth <= 0) {
-    return rc->worst_quality;  // Highest value allowed
-  } else {
-    const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
-                            ? cpi->initial_mbs
-                            : cpi->common.MBs;
-    const int active_mbs = AOMMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
-    const double av_err_per_mb = section_err / active_mbs;
-    const double speed_term = 1.0;
-    double ediv_size_correction;
-    const int target_norm_bits_per_mb =
-        (int)((uint64_t)section_target_bandwidth << BPER_MB_NORMBITS) /
-        active_mbs;
-    int q;
-
-    // Larger image formats are expected to be a little harder to code
-    // relatively given the same prediction error score. This in part at
-    // least relates to the increased size and hence coding overheads of
-    // motion vectors. Some account of this is made through adjustment of
-    // the error divisor.
-    ediv_size_correction =
-        AOMMAX(0.2, AOMMIN(5.0, get_linear_size_factor(cpi)));
-    if (ediv_size_correction < 1.0)
-      ediv_size_correction = -(1.0 / ediv_size_correction);
-    ediv_size_correction *= 4.0;
-
-    // Try and pick a max Q that will be high enough to encode the
-    // content at the given rate.
-    for (q = rc->best_quality; q < rc->worst_quality; ++q) {
-      const double factor = calc_correction_factor(
-          av_err_per_mb, ERR_DIVISOR - ediv_size_correction, FACTOR_PT_LOW,
-          FACTOR_PT_HIGH, q, cpi->common.seq_params.bit_depth);
-      const int bits_per_mb = av1_rc_bits_per_mb(
-          INTER_FRAME, q, factor * speed_term * group_weight_factor,
-          cpi->common.seq_params.bit_depth);
-      if (bits_per_mb <= target_norm_bits_per_mb) break;
-    }
-
-    // Restriction on active max q for constrained quality mode.
-    if (cpi->oxcf.rc_mode == AOM_CQ) q = AOMMAX(q, oxcf->cq_level);
-    return q;
-  }
-}
-
-static void setup_rf_level_maxq(AV1_COMP *cpi) {
-  int i;
-  RATE_CONTROL *const rc = &cpi->rc;
-  for (i = INTER_NORMAL; i < RATE_FACTOR_LEVELS; ++i) {
-    int qdelta = av1_frame_type_qdelta(cpi, i, rc->worst_quality);
-    rc->rf_level_maxq[i] = AOMMAX(rc->worst_quality + qdelta, rc->best_quality);
-  }
-}
-
-void av1_init_second_pass(AV1_COMP *cpi) {
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  TWO_PASS *const twopass = &cpi->twopass;
-  double frame_rate;
-  FIRSTPASS_STATS *stats;
-
-  zero_stats(&twopass->total_stats);
-  zero_stats(&twopass->total_left_stats);
-
-  if (!twopass->stats_in_end) return;
-
-  stats = &twopass->total_stats;
-
-  *stats = *twopass->stats_in_end;
-  twopass->total_left_stats = *stats;
-
-  frame_rate = 10000000.0 * stats->count / stats->duration;
-  // Each frame can have a different duration, as the frame rate in the source
-  // isn't guaranteed to be constant. The frame rate prior to the first frame
-  // encoded in the second pass is a guess. However, the sum duration is not.
-  // It is calculated based on the actual durations of all frames from the
-  // first pass.
-  av1_new_framerate(cpi, frame_rate);
-  twopass->bits_left =
-      (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
-
-  // This variable monitors how far behind the second ref update is lagging.
-  twopass->sr_update_lag = 1;
-
-  // Scan the first pass file and calculate a modified total error based upon
-  // the bias/power function used to allocate bits.
-  {
-    const double avg_error =
-        stats->coded_error / DOUBLE_DIVIDE_CHECK(stats->count);
-    const FIRSTPASS_STATS *s = twopass->stats_in;
-    double modified_error_total = 0.0;
-    twopass->modified_error_min =
-        (avg_error * oxcf->two_pass_vbrmin_section) / 100;
-    twopass->modified_error_max =
-        (avg_error * oxcf->two_pass_vbrmax_section) / 100;
-    while (s < twopass->stats_in_end) {
-      modified_error_total += calculate_modified_err(cpi, twopass, oxcf, s);
-      ++s;
-    }
-    twopass->modified_error_left = modified_error_total;
-  }
-
-  // Reset the vbr bits off target counters
-  cpi->rc.vbr_bits_off_target = 0;
-  cpi->rc.vbr_bits_off_target_fast = 0;
-
-  cpi->rc.rate_error_estimate = 0;
-
-  // Static sequence monitor variables.
-  twopass->kf_zeromotion_pct = 100;
-  twopass->last_kfgroup_zeromotion_pct = 100;
-
-  if (oxcf->resize_mode != RESIZE_NONE) {
-    setup_rf_level_maxq(cpi);
-  }
-}
-
-#define SR_DIFF_PART 0.0015
-#define MOTION_AMP_PART 0.003
-#define INTRA_PART 0.005
-#define DEFAULT_DECAY_LIMIT 0.75
-#define LOW_SR_DIFF_TRHESH 0.1
-#define SR_DIFF_MAX 128.0
-
-static double get_sr_decay_rate(const AV1_COMP *cpi,
-                                const FIRSTPASS_STATS *frame) {
-  const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
-                                                             : cpi->common.MBs;
-  double sr_diff = (frame->sr_coded_error - frame->coded_error) / num_mbs;
-  double sr_decay = 1.0;
-  double modified_pct_inter;
-  double modified_pcnt_intra;
-  const double motion_amplitude_factor =
-      frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
-
-  modified_pct_inter = frame->pcnt_inter;
-  if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
-      (double)NCOUNT_FRAME_II_THRESH) {
-    modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral;
-  }
-  modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
-
-  if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
-    sr_diff = AOMMIN(sr_diff, SR_DIFF_MAX);
-    sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) -
-               (MOTION_AMP_PART * motion_amplitude_factor) -
-               (INTRA_PART * modified_pcnt_intra);
-  }
-  return AOMMAX(sr_decay, AOMMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
-}
-
-// This function gives an estimate of how badly we believe the prediction
-// quality is decaying from frame to frame.
-static double get_zero_motion_factor(const AV1_COMP *cpi,
-                                     const FIRSTPASS_STATS *frame) {
-  const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion;
-  double sr_decay = get_sr_decay_rate(cpi, frame);
-  return AOMMIN(sr_decay, zero_motion_pct);
-}
-
-#define ZM_POWER_FACTOR 0.75
-
-static double get_prediction_decay_rate(const AV1_COMP *cpi,
-                                        const FIRSTPASS_STATS *next_frame) {
-  const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame);
-  const double zero_motion_factor =
-      (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
-                  ZM_POWER_FACTOR));
-
-  return AOMMAX(zero_motion_factor,
-                (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
-}
-
-// Function to test for a condition where a complex transition is followed
-// by a static section. For example in slide shows where there is a fade
-// between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(AV1_COMP *cpi, int frame_interval,
-                                      int still_interval,
-                                      double loop_decay_rate,
-                                      double last_decay_rate) {
-  TWO_PASS *const twopass = &cpi->twopass;
-  RATE_CONTROL *const rc = &cpi->rc;
-
-  // Break clause to detect very still sections after motion
-  // For example a static image after a fade or other transition
-  // instead of a clean scene cut.
-  if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 &&
-      last_decay_rate < 0.9) {
-    int j;
-
-    // Look ahead a few frames to see if static condition persists...
-    for (j = 0; j < still_interval; ++j) {
-      const FIRSTPASS_STATS *stats = &twopass->stats_in[j];
-      if (stats >= twopass->stats_in_end) break;
-
-      if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break;
-    }
-
-    // Only if it does do we signal a transition to still.
-    return j == still_interval;
-  }
-
-  return 0;
-}
-
-// This function detects a flash through the high relative pcnt_second_ref
-// score in the frame following a flash frame. The offset passed in should
-// reflect this.
-static int detect_flash(const TWO_PASS *twopass, int offset) {
-  const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
-
-  // What we are looking for here is a situation where there is a
-  // brief break in prediction (such as a flash) but subsequent frames
-  // are reasonably well predicted by an earlier (pre flash) frame.
-  // The recovery after a flash is indicated by a high pcnt_second_ref
-  // compared to pcnt_inter.
-  return next_frame != NULL &&
-         next_frame->pcnt_second_ref > next_frame->pcnt_inter &&
-         next_frame->pcnt_second_ref >= 0.5;
-}
-
-// Update the motion related elements to the GF arf boost calculation.
-static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
-                                          double *mv_in_out,
-                                          double *mv_in_out_accumulator,
-                                          double *abs_mv_in_out_accumulator,
-                                          double *mv_ratio_accumulator) {
-  const double pct = stats->pcnt_motion;
-
-  // Accumulate Motion In/Out of frame stats.
-  *mv_in_out = stats->mv_in_out_count * pct;
-  *mv_in_out_accumulator += *mv_in_out;
-  *abs_mv_in_out_accumulator += fabs(*mv_in_out);
-
-  // Accumulate a measure of how uniform (or conversely how random) the motion
-  // field is (a ratio of abs(mv) / mv).
-  if (pct > 0.05) {
-    const double mvr_ratio =
-        fabs(stats->mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVr));
-    const double mvc_ratio =
-        fabs(stats->mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVc));
-
-    *mv_ratio_accumulator +=
-        pct * (mvr_ratio < stats->mvr_abs ? mvr_ratio : stats->mvr_abs);
-    *mv_ratio_accumulator +=
-        pct * (mvc_ratio < stats->mvc_abs ? mvc_ratio : stats->mvc_abs);
-  }
-}
-
-#define BASELINE_ERR_PER_MB 1000.0
-static double calc_frame_boost(AV1_COMP *cpi, const FIRSTPASS_STATS *this_frame,
-                               double this_frame_mv_in_out, double max_boost) {
-  double frame_boost;
-  const double lq = av1_convert_qindex_to_q(
-      cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.seq_params.bit_depth);
-  const double boost_q_correction = AOMMIN((0.5 + (lq * 0.015)), 1.5);
-  int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
-                                                       : cpi->common.MBs;
-
-  // Correct for any inactive region in the image
-  num_mbs = (int)AOMMAX(1, num_mbs * calculate_active_area(cpi, this_frame));
-
-  // Underlying boost factor is based on inter error ratio.
-  frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
-                DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
-  frame_boost = frame_boost * BOOST_FACTOR * boost_q_correction;
-
-  // Increase boost for frames where new data coming into frame (e.g. zoom out).
-  // Slightly reduce boost if there is a net balance of motion out of the frame
-  // (zoom in). The range for this_frame_mv_in_out is -1.0 to +1.0.
-  if (this_frame_mv_in_out > 0.0)
-    frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
-  // In the extreme case the boost is halved.
-  else
-    frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
-
-  return AOMMIN(frame_boost, max_boost * boost_q_correction);
-}
-
-static int calc_arf_boost(AV1_COMP *cpi, int offset, int f_frames, int b_frames,
-                          int *f_boost, int *b_boost) {
-  TWO_PASS *const twopass = &cpi->twopass;
-  int i;
-  double boost_score = 0.0;
-  double mv_ratio_accumulator = 0.0;
-  double decay_accumulator = 1.0;
-  double this_frame_mv_in_out = 0.0;
-  double mv_in_out_accumulator = 0.0;
-  double abs_mv_in_out_accumulator = 0.0;
-  int arf_boost;
-  int flash_detected = 0;
-
-  // Search forward from the proposed arf/next gf position.
-  for (i = 0; i < f_frames; ++i) {
-    const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
-    if (this_frame == NULL) break;
-
-    // Update the motion related elements to the boost calculation.
-    accumulate_frame_motion_stats(
-        this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
-        &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
-
-    // We want to discount the flash frame itself and the recovery
-    // frame that follows as both will have poor scores.
-    flash_detected = detect_flash(twopass, i + offset) ||
-                     detect_flash(twopass, i + offset + 1);
-
-    // Accumulate the effect of prediction quality decay.
-    if (!flash_detected) {
-      decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
-      decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
-                              ? MIN_DECAY_FACTOR
-                              : decay_accumulator;
-    }
-
-    boost_score +=
-        decay_accumulator *
-        calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST);
-  }
-
-  *f_boost = (int)boost_score;
-
-  // Reset for backward looking loop.
-  boost_score = 0.0;
-  mv_ratio_accumulator = 0.0;
-  decay_accumulator = 1.0;
-  this_frame_mv_in_out = 0.0;
-  mv_in_out_accumulator = 0.0;
-  abs_mv_in_out_accumulator = 0.0;
-
-  // Search backward towards last gf position.
-  for (i = -1; i >= -b_frames; --i) {
-    const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
-    if (this_frame == NULL) break;
-
-    // Update the motion related elements to the boost calculation.
-    accumulate_frame_motion_stats(
-        this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
-        &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
-
-    // We want to discount the the flash frame itself and the recovery
-    // frame that follows as both will have poor scores.
-    flash_detected = detect_flash(twopass, i + offset) ||
-                     detect_flash(twopass, i + offset + 1);
-
-    // Cumulative effect of prediction quality decay.
-    if (!flash_detected) {
-      decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
-      decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
-                              ? MIN_DECAY_FACTOR
-                              : decay_accumulator;
-    }
-
-    boost_score +=
-        decay_accumulator *
-        calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST);
-  }
-  *b_boost = (int)boost_score;
-
-  arf_boost = (*f_boost + *b_boost);
-  if (arf_boost < ((b_frames + f_frames) * 20))
-    arf_boost = ((b_frames + f_frames) * 20);
-  arf_boost = AOMMAX(arf_boost, MIN_ARF_GF_BOOST);
-
-  return arf_boost;
-}
-
-// Calculate a section intra ratio used in setting max loop filter.
-static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin,
-                                         const FIRSTPASS_STATS *end,
-                                         int section_length) {
-  const FIRSTPASS_STATS *s = begin;
-  double intra_error = 0.0;
-  double coded_error = 0.0;
-  int i = 0;
-
-  while (s < end && i < section_length) {
-    intra_error += s->intra_error;
-    coded_error += s->coded_error;
-    ++s;
-    ++i;
-  }
-
-  return (int)(intra_error / DOUBLE_DIVIDE_CHECK(coded_error));
-}
-
-// Calculate the total bits to allocate in this GF/ARF group.
-static int64_t calculate_total_gf_group_bits(AV1_COMP *cpi,
-                                             double gf_group_err) {
-  const RATE_CONTROL *const rc = &cpi->rc;
-  const TWO_PASS *const twopass = &cpi->twopass;
-  const int max_bits = frame_max_bits(rc, &cpi->oxcf);
-  int64_t total_group_bits;
-
-  // Calculate the bits to be allocated to the group as a whole.
-  if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0)) {
-    total_group_bits = (int64_t)(twopass->kf_group_bits *
-                                 (gf_group_err / twopass->kf_group_error_left));
-  } else {
-    total_group_bits = 0;
-  }
-
-  // Clamp odd edge cases.
-  total_group_bits = (total_group_bits < 0)
-                         ? 0
-                         : (total_group_bits > twopass->kf_group_bits)
-                               ? twopass->kf_group_bits
-                               : total_group_bits;
-
-  // Clip based on user supplied data rate variability limit.
-  if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
-    total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
-
-  return total_group_bits;
-}
-
-// Calculate the number bits extra to assign to boosted frames in a group.
-static int calculate_boost_bits(int frame_count, int boost,
-                                int64_t total_group_bits) {
-  int allocation_chunks;
-
-  // return 0 for invalid inputs (could arise e.g. through rounding errors)
-  if (!boost || (total_group_bits <= 0) || (frame_count <= 0)) return 0;
-
-  allocation_chunks = (frame_count * 100) + boost;
-
-  // Prevent overflow.
-  if (boost > 1023) {
-    int divisor = boost >> 10;
-    boost /= divisor;
-    allocation_chunks /= divisor;
-  }
-
-  // Calculate the number of extra bits for use in the boosted frame or frames.
-  return AOMMAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks),
-                0);
-}
-
-#if USE_SYMM_MULTI_LAYER
-// #define CHCEK_GF_PARAMETER
-#ifdef CHCEK_GF_PARAMETER
-void check_frame_params(GF_GROUP *const gf_group, int gf_interval,
-                        int frame_nums) {
-  static const char *update_type_strings[] = {
-    "KF_UPDATE",          "LF_UPDATE",      "GF_UPDATE",
-    "ARF_UPDATE",         "OVERLAY_UPDATE", "BRF_UPDATE",
-    "LAST_BIPRED_UPDATE", "BIPRED_UPDATE",  "INTNL_OVERLAY_UPDATE",
-    "INTNL_ARF_UPDATE"
-  };
-  FILE *fid = fopen("GF_PARAMS.txt", "a");
-
-  fprintf(fid, "\n{%d}\n", gf_interval);
-  for (int i = 0; i <= frame_nums; ++i) {
-    fprintf(fid, "%s %d %d %d %d\n",
-            update_type_strings[gf_group->update_type[i]],
-            gf_group->arf_src_offset[i], gf_group->arf_pos_in_gf[i],
-            gf_group->arf_update_idx[i], gf_group->pyramid_level[i]);
-  }
-
-  fprintf(fid, "number of nodes in each level: \n");
-  for (int i = 0; i < MAX_PYRAMID_LVL; ++i) {
-    fprintf(fid, "lvl %d: %d ", i, gf_group->pyramid_lvl_nodes[i]);
-  }
-  fprintf(fid, "\n");
-  fclose(fid);
-}
-#endif  // CHCEK_GF_PARAMETER
-static int update_type_2_rf_level(FRAME_UPDATE_TYPE update_type) {
-  // Derive rf_level from update_type
-  switch (update_type) {
-    case LF_UPDATE: return INTER_NORMAL;
-    case ARF_UPDATE: return GF_ARF_STD;
-    case OVERLAY_UPDATE: return INTER_NORMAL;
-    case BRF_UPDATE: return GF_ARF_LOW;
-    case LAST_BIPRED_UPDATE: return INTER_NORMAL;
-    case BIPRED_UPDATE: return INTER_NORMAL;
-    case INTNL_ARF_UPDATE: return GF_ARF_LOW;
-    case INTNL_OVERLAY_UPDATE: return INTER_NORMAL;
-    default: return INTER_NORMAL;
-  }
-}
-
-static void set_multi_layer_params(GF_GROUP *const gf_group, int l, int r,
-                                   int *frame_ind, int arf_ind, int level) {
-  if (r - l < 4) {
-    while (++l < r) {
-      // leaf nodes, not a look-ahead frame
-      gf_group->update_type[*frame_ind] = LF_UPDATE;
-      gf_group->arf_src_offset[*frame_ind] = 0;
-      gf_group->arf_pos_in_gf[*frame_ind] = 0;
-      gf_group->arf_update_idx[*frame_ind] = arf_ind;
-      gf_group->pyramid_level[*frame_ind] = 0;
-      ++gf_group->pyramid_lvl_nodes[0];
-      ++(*frame_ind);
-    }
-  } else {
-    int m = (l + r) / 2;
-    int arf_pos_in_gf = *frame_ind;
-
-    gf_group->update_type[*frame_ind] = INTNL_ARF_UPDATE;
-    gf_group->arf_src_offset[*frame_ind] = m - l - 1;
-    gf_group->arf_pos_in_gf[*frame_ind] = 0;
-    gf_group->arf_update_idx[*frame_ind] = 1;  // mark all internal ARF 1
-    gf_group->pyramid_level[*frame_ind] = level;
-    ++gf_group->pyramid_lvl_nodes[level];
-    ++(*frame_ind);
-
-    // set parameters for frames displayed before this frame
-    set_multi_layer_params(gf_group, l, m, frame_ind, 1, level - 1);
-
-    // for overlay frames, we need to record the position of its corresponding
-    // arf frames for bit allocation
-    gf_group->update_type[*frame_ind] = INTNL_OVERLAY_UPDATE;
-    gf_group->arf_src_offset[*frame_ind] = 0;
-    gf_group->arf_pos_in_gf[*frame_ind] = arf_pos_in_gf;
-    gf_group->arf_update_idx[*frame_ind] = 1;
-    gf_group->pyramid_level[*frame_ind] = 0;
-    ++(*frame_ind);
-
-    // set parameters for frames displayed after this frame
-    set_multi_layer_params(gf_group, m, r, frame_ind, arf_ind, level - 1);
-  }
-}
-
-static INLINE unsigned char get_pyramid_height(int pyramid_width) {
-  assert(pyramid_width <= 16 && pyramid_width >= 4 &&
-         "invalid gf interval for pyramid structure");
-
-  return pyramid_width > 12 ? 4 : (pyramid_width > 6 ? 3 : 2);
-}
-
-static int construct_multi_layer_gf_structure(GF_GROUP *const gf_group,
-                                              const int gf_interval) {
-  int frame_index = 0;
-  gf_group->pyramid_height = get_pyramid_height(gf_interval);
-
-  assert(gf_group->pyramid_height <= MAX_PYRAMID_LVL);
-
-  av1_zero_array(gf_group->pyramid_lvl_nodes, MAX_PYRAMID_LVL);
-
-  // At the beginning of each GF group it will be a key or overlay frame,
-  gf_group->update_type[frame_index] = OVERLAY_UPDATE;
-  gf_group->arf_src_offset[frame_index] = 0;
-  gf_group->arf_pos_in_gf[frame_index] = 0;
-  gf_group->arf_update_idx[frame_index] = 0;
-  gf_group->pyramid_level[frame_index] = 0;
-  ++frame_index;
-
-  // ALT0
-  gf_group->update_type[frame_index] = ARF_UPDATE;
-  gf_group->arf_src_offset[frame_index] = gf_interval - 1;
-  gf_group->arf_pos_in_gf[frame_index] = 0;
-  gf_group->arf_update_idx[frame_index] = 0;
-  gf_group->pyramid_level[frame_index] = gf_group->pyramid_height;
-  ++frame_index;
-
-  // set parameters for the rest of the frames
-  set_multi_layer_params(gf_group, 0, gf_interval, &frame_index, 0,
-                         gf_group->pyramid_height - 1);
-  return frame_index;
-}
-
-void define_customized_gf_group_structure(AV1_COMP *cpi) {
-  RATE_CONTROL *const rc = &cpi->rc;
-  TWO_PASS *const twopass = &cpi->twopass;
-  GF_GROUP *const gf_group = &twopass->gf_group;
-  const int key_frame = cpi->common.frame_type == KEY_FRAME;
-
-  assert(rc->baseline_gf_interval >= 4 &&
-         rc->baseline_gf_interval <= MAX_PYRAMID_SIZE);
-
-  const int gf_update_frames =
-      construct_multi_layer_gf_structure(gf_group, rc->baseline_gf_interval);
-  int frame_index;
-
-  cpi->num_extra_arfs = 0;
-
-  for (frame_index = 0; frame_index < gf_update_frames; ++frame_index) {
-    // Set unused variables to default values
-    gf_group->bidir_pred_enabled[frame_index] = 0;
-    gf_group->brf_src_offset[frame_index] = 0;
-
-    // Special handle for the first frame for assigning update_type
-    if (frame_index == 0) {
-      // For key frames the frame target rate is already set and it
-      // is also the golden frame.
-      if (key_frame) {
-        gf_group->update_type[frame_index] = KF_UPDATE;
-        continue;
-      }
-
-      if (rc->source_alt_ref_active) {
-        gf_group->update_type[frame_index] = OVERLAY_UPDATE;
-      } else {
-        gf_group->update_type[frame_index] = GF_UPDATE;
-      }
-    } else {
-      if (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
-        ++cpi->num_extra_arfs;
-    }
-
-    // Assign rf level based on update type
-    gf_group->rf_level[frame_index] =
-        update_type_2_rf_level(gf_group->update_type[frame_index]);
-  }
-
-  // NOTE: We need to configure the frame at the end of the sequence + 1 that
-  //       will be the start frame for the next group. Otherwise prior to the
-  //       call to av1_rc_get_second_pass_params() the data will be undefined.
-  if (rc->source_alt_ref_pending) {
-    gf_group->update_type[frame_index] = OVERLAY_UPDATE;
-    gf_group->rf_level[frame_index] = INTER_NORMAL;
-  } else {
-    gf_group->update_type[frame_index] = GF_UPDATE;
-    gf_group->rf_level[frame_index] = GF_ARF_STD;
-  }
-
-  gf_group->bidir_pred_enabled[frame_index] = 0;
-  gf_group->brf_src_offset[frame_index] = 0;
-  gf_group->arf_update_idx[frame_index] = 0;
-  // This value is only used for INTNL_OVERLAY_UPDATE
-  gf_group->arf_pos_in_gf[frame_index] = 0;
-
-  // This parameter is useless?
-  gf_group->arf_ref_idx[frame_index] = 0;
-#ifdef CHCEK_GF_PARAMETER
-  check_frame_params(gf_group, rc->baseline_gf_interval, gf_update_frames);
-#endif
-}
-
-// It is an example of how to define a GF stucture manually. The function will
-// result in exactly the same GF group structure as
-// define_customized_gf_group_structure() when rc->baseline_gf_interval == 4
-#if USE_MANUAL_GF4_STRUCT
-#define GF_INTERVAL_4 4
-static const unsigned char gf4_multi_layer_params[][GF_FRAME_PARAMS] = {
-  {
-      // gf_group->index == 0 (Frame 0)
-      // It can also be KEY frame. Will assign the proper value
-      // in define_gf_group_structure
-      OVERLAY_UPDATE,  // update_type (default value)
-      0,               // arf_src_offset
-      0,               // arf_pos_in_gf
-      0                // arf_update_idx
-  },
-  {
-      // gf_group->index == 1 (Frame 4)
-      ARF_UPDATE,         // update_type
-      GF_INTERVAL_4 - 1,  // arf_src_offset
-      0,                  // arf_pos_in_gf
-      0                   // arf_update_idx
-  },
-  {
-      // gf_group->index == 2 (Frame 2)
-      INTNL_ARF_UPDATE,          // update_type
-      (GF_INTERVAL_4 >> 1) - 1,  // arf_src_offset
-      0,                         // arf_pos_in_gf
-      0                          // arf_update_idx
-  },
-  {
-      // gf_group->index == 3 (Frame 1)
-      LAST_BIPRED_UPDATE,  // update_type
-      0,                   // arf_src_offset
-      0,                   // arf_pos_in_gf
-      0                    // arf_update_idx
-  },
-
-  {
-      // gf_group->index == 4 (Frame 2 - OVERLAY)
-      INTNL_OVERLAY_UPDATE,  // update_type
-      0,                     // arf_src_offset
-      2,                     // arf_pos_in_gf
-      0                      // arf_update_idx
-  },
-  {
-      // gf_group->index == 5 (Frame 3)
-      LF_UPDATE,  // update_type
-      0,          // arf_src_offset
-      0,          // arf_pos_in_gf
-      1           // arf_update_idx
-  }
-};
-
-static int define_gf_group_structure_4(AV1_COMP *cpi) {
-  RATE_CONTROL *const rc = &cpi->rc;
-  TWO_PASS *const twopass = &cpi->twopass;
-  GF_GROUP *const gf_group = &twopass->gf_group;
-  const int key_frame = cpi->common.frame_type == KEY_FRAME;
-
-  assert(rc->baseline_gf_interval == GF_INTERVAL_4);
-
-  const int gf_update_frames = rc->baseline_gf_interval + 2;
-  int frame_index;
-
-  for (frame_index = 0; frame_index < gf_update_frames; ++frame_index) {
-    int param_idx = 0;
-
-    gf_group->bidir_pred_enabled[frame_index] = 0;
-
-    if (frame_index == 0) {
-      // gf_group->arf_src_offset[frame_index] = 0;
-      gf_group->brf_src_offset[frame_index] = 0;
-      gf_group->bidir_pred_enabled[frame_index] = 0;
-
-      // For key frames the frame target rate is already set and it
-      // is also the golden frame.
-      if (key_frame) continue;
-
-      gf_group->update_type[frame_index] =
-          gf4_multi_layer_params[frame_index][param_idx++];
-
-      if (rc->source_alt_ref_active) {
-        gf_group->update_type[frame_index] = OVERLAY_UPDATE;
-      } else {
-        gf_group->update_type[frame_index] = GF_UPDATE;
-      }
-      param_idx++;
-    } else {
-      gf_group->update_type[frame_index] =
-          gf4_multi_layer_params[frame_index][param_idx++];
-    }
-
-    // setup other parameters
-    gf_group->rf_level[frame_index] =
-        update_type_2_rf_level(gf_group->update_type[frame_index]);
-
-    // == arf_src_offset ==
-    gf_group->arf_src_offset[frame_index] =
-        gf4_multi_layer_params[frame_index][param_idx++];
-
-    // == arf_pos_in_gf ==
-    gf_group->arf_pos_in_gf[frame_index] =
-        gf4_multi_layer_params[frame_index][param_idx++];
-
-    // == arf_update_idx ==
-    gf_group->brf_src_offset[frame_index] =
-        gf4_multi_layer_params[frame_index][param_idx];
-  }
-
-  // NOTE: We need to configure the frame at the end of the sequence + 1 that
-  //       will be the start frame for the next group. Otherwise prior to the
-  //       call to av1_rc_get_second_pass_params() the data will be undefined.
-  gf_group->arf_update_idx[frame_index] = 0;
-  gf_group->arf_ref_idx[frame_index] = 0;
-
-  if (rc->source_alt_ref_pending) {
-    gf_group->update_type[frame_index] = OVERLAY_UPDATE;
-    gf_group->rf_level[frame_index] = INTER_NORMAL;
-
-  } else {
-    gf_group->update_type[frame_index] = GF_UPDATE;
-    gf_group->rf_level[frame_index] = GF_ARF_STD;
-  }
-
-  gf_group->bidir_pred_enabled[frame_index] = 0;
-  gf_group->brf_src_offset[frame_index] = 0;
-
-  // This value is only used for INTNL_OVERLAY_UPDATE
-  gf_group->arf_pos_in_gf[frame_index] = 0;
-
-  return gf_update_frames;
-}
-#endif  // USE_MANUAL_GF4_STRUCT
-#endif  // USE_SYMM_MULTI_LAYER
-
-static void define_gf_group_structure(AV1_COMP *cpi) {
-  RATE_CONTROL *const rc = &cpi->rc;
-
-#if USE_SYMM_MULTI_LAYER
-  const int valid_customized_gf_length =
-      rc->baseline_gf_interval >= 4 &&
-      rc->baseline_gf_interval <= MAX_PYRAMID_SIZE;
-  // used the new structure only if extra_arf is allowed
-  if (valid_customized_gf_length && rc->source_alt_ref_pending &&
-      cpi->extra_arf_allowed > 0) {
-#if USE_MANUAL_GF4_STRUCT
-    if (rc->baseline_gf_interval == 4)
-      define_gf_group_structure_4(cpi);
-    else
-#endif
-      define_customized_gf_group_structure(cpi);
-    cpi->new_bwdref_update_rule = 1;
-    return;
-  } else {
-    cpi->new_bwdref_update_rule = 0;
-  }
-#endif
-
-  TWO_PASS *const twopass = &cpi->twopass;
-  GF_GROUP *const gf_group = &twopass->gf_group;
-  int i;
-  int frame_index = 0;
-  const int key_frame = cpi->common.frame_type == KEY_FRAME;
-
-  // The use of bi-predictive frames are only enabled when following 3
-  // conditions are met:
-  // (1) ALTREF is enabled;
-  // (2) The bi-predictive group interval is at least 2; and
-  // (3) The bi-predictive group interval is strictly smaller than the
-  //     golden group interval.
-  const int is_bipred_enabled =
-      cpi->extra_arf_allowed && rc->source_alt_ref_pending &&
-      rc->bipred_group_interval &&
-      rc->bipred_group_interval <=
-          (rc->baseline_gf_interval - rc->source_alt_ref_pending);
-  int bipred_group_end = 0;
-  int bipred_frame_index = 0;
-
-  const unsigned char ext_arf_interval =
-      (unsigned char)(rc->baseline_gf_interval / (cpi->num_extra_arfs + 1) - 1);
-  int which_arf = cpi->num_extra_arfs;
-  int subgroup_interval[MAX_EXT_ARFS + 1];
-  int is_sg_bipred_enabled = is_bipred_enabled;
-  int accumulative_subgroup_interval = 0;
-
-  // For key frames the frame target rate is already set and it
-  // is also the golden frame.
-  // === [frame_index == 0] ===
-  if (!key_frame) {
-    if (rc->source_alt_ref_active) {
-      gf_group->update_type[frame_index] = OVERLAY_UPDATE;
-      gf_group->rf_level[frame_index] = INTER_NORMAL;
-    } else {
-      gf_group->update_type[frame_index] = GF_UPDATE;
-      gf_group->rf_level[frame_index] = GF_ARF_STD;
-    }
-    gf_group->arf_update_idx[frame_index] = 0;
-    gf_group->arf_ref_idx[frame_index] = 0;
-  }
-
-  gf_group->bidir_pred_enabled[frame_index] = 0;
-  gf_group->brf_src_offset[frame_index] = 0;
-
-  frame_index++;
-
-  bipred_frame_index++;
-
-  // === [frame_index == 1] ===
-  if (rc->source_alt_ref_pending) {
-    gf_group->update_type[frame_index] = ARF_UPDATE;
-    gf_group->rf_level[frame_index] = GF_ARF_STD;
-    gf_group->arf_src_offset[frame_index] =
-        (unsigned char)(rc->baseline_gf_interval - 1);
-
-    gf_group->arf_update_idx[frame_index] = 0;
-    gf_group->arf_ref_idx[frame_index] = 0;
-
-    gf_group->bidir_pred_enabled[frame_index] = 0;
-    gf_group->brf_src_offset[frame_index] = 0;
-    // NOTE: "bidir_pred_frame_index" stays unchanged for ARF_UPDATE frames.
-
-    // Work out the ARFs' positions in this gf group
-    // NOTE(weitinglin): ALT_REFs' are indexed inversely, but coded in display
-    // order (except for the original ARF). In the example of three ALT_REF's,
-    // We index ALTREF's as: KEY ----- ALT2 ----- ALT1 ----- ALT0
-    // but code them in the following order:
-    // KEY-ALT0-ALT2 ----- OVERLAY2-ALT1 ----- OVERLAY1 ----- OVERLAY0
-    //
-    // arf_pos_for_ovrly[]: Position for OVERLAY
-    // arf_pos_in_gf[]:     Position for ALTREF
-    cpi->arf_pos_for_ovrly[0] = frame_index + cpi->num_extra_arfs +
-                                gf_group->arf_src_offset[frame_index] + 1;
-    for (i = 0; i < cpi->num_extra_arfs; ++i) {
-      cpi->arf_pos_for_ovrly[i + 1] =
-          frame_index + (cpi->num_extra_arfs - i) * (ext_arf_interval + 2);
-      subgroup_interval[i] = cpi->arf_pos_for_ovrly[i] -
-                             cpi->arf_pos_for_ovrly[i + 1] - (i == 0 ? 1 : 2);
-    }
-    subgroup_interval[cpi->num_extra_arfs] =
-        cpi->arf_pos_for_ovrly[cpi->num_extra_arfs] - frame_index -
-        (cpi->num_extra_arfs == 0 ? 1 : 2);
-
-    ++frame_index;
-
-    // Insert an extra ARF
-    // === [frame_index == 2] ===
-    if (cpi->num_extra_arfs) {
-      gf_group->update_type[frame_index] = INTNL_ARF_UPDATE;
-      gf_group->rf_level[frame_index] = GF_ARF_LOW;
-      gf_group->arf_src_offset[frame_index] = ext_arf_interval;
-
-      gf_group->arf_update_idx[frame_index] = which_arf;
-      gf_group->arf_ref_idx[frame_index] = 0;
-      ++frame_index;
-    }
-    accumulative_subgroup_interval += subgroup_interval[cpi->num_extra_arfs];
-  }
-
-  for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
-    gf_group->arf_update_idx[frame_index] = which_arf;
-    gf_group->arf_ref_idx[frame_index] = which_arf;
-
-    // If we are going to have ARFs, check whether we can have BWDREF in this
-    // subgroup, and further, whether we can have ARF subgroup which contains
-    // the BWDREF subgroup but contained within the GF group:
-    //
-    // GF group --> ARF subgroup --> BWDREF subgroup
-    if (rc->source_alt_ref_pending) {
-      is_sg_bipred_enabled =
-          is_bipred_enabled &&
-          (subgroup_interval[which_arf] > rc->bipred_group_interval);
-    }
-
-    // NOTE: BIDIR_PRED is only enabled when the length of the bi-predictive
-    //       frame group interval is strictly smaller than that of the GOLDEN
-    //       FRAME group interval.
-    // TODO(zoeliu): Currently BIDIR_PRED is only enabled when alt-ref is on.
-    if (is_sg_bipred_enabled && !bipred_group_end) {
-      const int cur_brf_src_offset = rc->bipred_group_interval - 1;
-
-      if (bipred_frame_index == 1) {
-        // --- BRF_UPDATE ---
-        gf_group->update_type[frame_index] = BRF_UPDATE;
-        gf_group->rf_level[frame_index] = GF_ARF_LOW;
-        gf_group->brf_src_offset[frame_index] = cur_brf_src_offset;
-      } else if (bipred_frame_index == rc->bipred_group_interval) {
-        // --- LAST_BIPRED_UPDATE ---
-        gf_group->update_type[frame_index] = LAST_BIPRED_UPDATE;
-        gf_group->rf_level[frame_index] = INTER_NORMAL;
-        gf_group->brf_src_offset[frame_index] = 0;
-
-        // Reset the bi-predictive frame index.
-        bipred_frame_index = 0;
-      } else {
-        // --- BIPRED_UPDATE ---
-        gf_group->update_type[frame_index] = BIPRED_UPDATE;
-        gf_group->rf_level[frame_index] = INTER_NORMAL;
-        gf_group->brf_src_offset[frame_index] = 0;
-      }
-      gf_group->bidir_pred_enabled[frame_index] = 1;
-
-      bipred_frame_index++;
-      // Check whether the next bi-predictive frame group would entirely be
-      // included within the current golden frame group.
-      // In addition, we need to avoid coding a BRF right before an ARF.
-      if (bipred_frame_index == 1 &&
-          (i + 2 + cur_brf_src_offset) >= accumulative_subgroup_interval) {
-        bipred_group_end = 1;
-      }
-    } else {
-      gf_group->update_type[frame_index] = LF_UPDATE;
-      gf_group->rf_level[frame_index] = INTER_NORMAL;
-      gf_group->bidir_pred_enabled[frame_index] = 0;
-      gf_group->brf_src_offset[frame_index] = 0;
-    }
-
-    ++frame_index;
-
-    // Check if we need to update the ARF.
-    if (is_sg_bipred_enabled && cpi->num_extra_arfs && which_arf > 0 &&
-        frame_index > cpi->arf_pos_for_ovrly[which_arf]) {
-      --which_arf;
-      accumulative_subgroup_interval += subgroup_interval[which_arf] + 1;
-
-      // Meet the new subgroup; Reset the bipred_group_end flag.
-      bipred_group_end = 0;
-      // Insert another extra ARF after the overlay frame
-      if (which_arf) {
-        gf_group->update_type[frame_index] = INTNL_ARF_UPDATE;
-        gf_group->rf_level[frame_index] = GF_ARF_LOW;
-        gf_group->arf_src_offset[frame_index] = ext_arf_interval;
-
-        gf_group->arf_update_idx[frame_index] = which_arf;
-        gf_group->arf_ref_idx[frame_index] = 0;
-        ++frame_index;
-      }
-    }
-  }
-
-  // NOTE: We need to configure the frame at the end of the sequence + 1 that
-  //       will be the start frame for the next group. Otherwise prior to the
-  //       call to av1_rc_get_second_pass_params() the data will be undefined.
-  gf_group->arf_update_idx[frame_index] = 0;
-  gf_group->arf_ref_idx[frame_index] = 0;
-
-  if (rc->source_alt_ref_pending) {
-    gf_group->update_type[frame_index] = OVERLAY_UPDATE;
-    gf_group->rf_level[frame_index] = INTER_NORMAL;
-
-    cpi->arf_pos_in_gf[0] = 1;
-    if (cpi->num_extra_arfs) {
-      // Overwrite the update_type for extra-ARF's corresponding internal
-      // OVERLAY's: Change from LF_UPDATE to INTNL_OVERLAY_UPDATE.
-      for (i = cpi->num_extra_arfs; i > 0; --i) {
-        cpi->arf_pos_in_gf[i] =
-            (i == cpi->num_extra_arfs ? 2 : cpi->arf_pos_for_ovrly[i + 1] + 1);
-
-        gf_group->update_type[cpi->arf_pos_for_ovrly[i]] = INTNL_OVERLAY_UPDATE;
-        gf_group->rf_level[cpi->arf_pos_for_ovrly[i]] = INTER_NORMAL;
-      }
-    }
-  } else {
-    gf_group->update_type[frame_index] = GF_UPDATE;
-    gf_group->rf_level[frame_index] = GF_ARF_STD;
-  }
-
-  gf_group->bidir_pred_enabled[frame_index] = 0;
-  gf_group->brf_src_offset[frame_index] = 0;
-}
-
-#if USE_SYMM_MULTI_LAYER
-#define LEAF_REDUCTION_FACTOR 0.75f
-#define LVL_3_BOOST_FACTOR 0.8f
-#define LVL_2_BOOST_FACTOR 0.3f
-
-static float_t lvl_budget_factor[MAX_PYRAMID_LVL - 1][MAX_PYRAMID_LVL - 1] = {
-  { 1, 0, 0 },
-  { LVL_3_BOOST_FACTOR, 0, 0 },  // Leaking budget works better
-  { LVL_3_BOOST_FACTOR, (1 - LVL_3_BOOST_FACTOR) * LVL_2_BOOST_FACTOR,
-    (1 - LVL_3_BOOST_FACTOR) * (1 - LVL_2_BOOST_FACTOR) }
-};
-#endif  // USE_SYMM_MULTI_LAYER
-static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
-                                   double group_error, int gf_arf_bits) {
-  RATE_CONTROL *const rc = &cpi->rc;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  TWO_PASS *const twopass = &cpi->twopass;
-  GF_GROUP *const gf_group = &twopass->gf_group;
-  FIRSTPASS_STATS frame_stats;
-  int i;
-  int frame_index = 0;
-  int target_frame_size;
-  int key_frame;
-  const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
-  int64_t total_group_bits = gf_group_bits;
-  double modified_err = 0.0;
-  double err_fraction;
-  int ext_arf_boost[MAX_EXT_ARFS];
-
-  define_gf_group_structure(cpi);
-
-  av1_zero_array(ext_arf_boost, MAX_EXT_ARFS);
-
-  key_frame = cpi->common.frame_type == KEY_FRAME;
-
-  // For key frames the frame target rate is already set and it
-  // is also the golden frame.
-  // === [frame_index == 0] ===
-  if (!key_frame) {
-    if (rc->source_alt_ref_active)
-      gf_group->bit_allocation[frame_index] = 0;
-    else
-      gf_group->bit_allocation[frame_index] = gf_arf_bits;
-
-    // Step over the golden frame / overlay frame
-    if (EOF == input_stats(twopass, &frame_stats)) return;
-  }
-
-  // Deduct the boost bits for arf (or gf if it is not a key frame)
-  // from the group total.
-  if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
-
-  frame_index++;
-
-  // Store the bits to spend on the ARF if there is one.
-  // === [frame_index == 1] ===
-  if (rc->source_alt_ref_pending) {
-    gf_group->bit_allocation[frame_index] = gf_arf_bits;
-
-    ++frame_index;
-
-    // Skip all the extra-ARF's right after ARF at the starting segment of
-    // the current GF group.
-    if (cpi->num_extra_arfs) {
-      while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
-        ++frame_index;
-    }
-  }
-
-  // Allocate bits to the other frames in the group.
-  for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
-    if (EOF == input_stats(twopass, &frame_stats)) break;
-
-    modified_err = calculate_modified_err(cpi, twopass, oxcf, &frame_stats);
-
-    if (group_error > 0)
-      err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error);
-    else
-      err_fraction = 0.0;
-
-    target_frame_size = (int)((double)total_group_bits * err_fraction);
-
-    target_frame_size =
-        clamp(target_frame_size, 0, AOMMIN(max_bits, (int)total_group_bits));
-
-    if (gf_group->update_type[frame_index] == BRF_UPDATE) {
-      // Boost up the allocated bits on BWDREF_FRAME
-      gf_group->bit_allocation[frame_index] =
-          target_frame_size + (target_frame_size >> 2);
-    } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) {
-      // Press down the allocated bits on LAST_BIPRED_UPDATE frames
-      gf_group->bit_allocation[frame_index] =
-          target_frame_size - (target_frame_size >> 1);
-    } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) {
-      // TODO(zoeliu): To investigate whether the allocated bits on
-      // BIPRED_UPDATE frames need to be further adjusted.
-      gf_group->bit_allocation[frame_index] = target_frame_size;
-#if USE_SYMM_MULTI_LAYER
-    } else if (cpi->new_bwdref_update_rule &&
-               gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE) {
-      assert(gf_group->pyramid_height <= MAX_PYRAMID_LVL &&
-             gf_group->pyramid_height >= 0 &&
-             "non-valid height for a pyramid structure");
-
-      int arf_pos = gf_group->arf_pos_in_gf[frame_index];
-      gf_group->bit_allocation[frame_index] = 0;
-
-      gf_group->bit_allocation[arf_pos] = target_frame_size;
-#if MULTI_LVL_BOOST_VBR_CQ
-      const int pyr_h = gf_group->pyramid_height - 2;
-      const int this_lvl = gf_group->pyramid_level[arf_pos];
-      const int dist2top = gf_group->pyramid_height - 1 - this_lvl;
-
-      const float_t budget =
-          LEAF_REDUCTION_FACTOR * gf_group->pyramid_lvl_nodes[0];
-      const float_t lvl_boost = budget * lvl_budget_factor[pyr_h][dist2top] /
-                                gf_group->pyramid_lvl_nodes[this_lvl];
-
-      gf_group->bit_allocation[arf_pos] += (int)(target_frame_size * lvl_boost);
-#endif  // MULTI_LVL_BOOST_VBR_CQ
-#endif  // USE_SYMM_MULTI_LAYER
-    } else {
-      assert(gf_group->update_type[frame_index] == LF_UPDATE ||
-             gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE);
-      gf_group->bit_allocation[frame_index] = target_frame_size;
-#if MULTI_LVL_BOOST_VBR_CQ
-      if (cpi->new_bwdref_update_rule) {
-        gf_group->bit_allocation[frame_index] -=
-            (int)(target_frame_size * LEAF_REDUCTION_FACTOR);
-      }
-#endif  // MULTI_LVL_BOOST_VBR_CQ
-    }
-
-    ++frame_index;
-
-    // Skip all the extra-ARF's.
-    if (cpi->num_extra_arfs) {
-      while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
-        ++frame_index;
-    }
-  }
-
-#if USE_SYMM_MULTI_LAYER
-  if (cpi->new_bwdref_update_rule == 0 && rc->source_alt_ref_pending) {
-#else
-  if (rc->source_alt_ref_pending) {
-#endif
-    if (cpi->num_extra_arfs) {
-      // NOTE: For bit allocation, move the allocated bits associated with
-      //       INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE.
-      //       i > 0 for extra-ARF's and i == 0 for ARF:
-      //         arf_pos_for_ovrly[i]: Position for INTNL_OVERLAY_UPDATE
-      //         arf_pos_in_gf[i]: Position for INTNL_ARF_UPDATE
-      for (i = cpi->num_extra_arfs; i > 0; --i) {
-        assert(gf_group->update_type[cpi->arf_pos_for_ovrly[i]] ==
-               INTNL_OVERLAY_UPDATE);
-
-        // Encoder's choice:
-        //   Set show_existing_frame == 1 for all extra-ARF's, and hence
-        //   allocate zero bit for both all internal OVERLAY frames.
-        gf_group->bit_allocation[cpi->arf_pos_in_gf[i]] =
-            gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]];
-        gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]] = 0;
-      }
-    }
-  }
-}
-
-// Analyse and define a gf/arf group.
-static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
-  AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-  AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  TWO_PASS *const twopass = &cpi->twopass;
-  FIRSTPASS_STATS next_frame;
-  const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
-  int i;
-
-  double boost_score = 0.0;
-#if !CONFIG_FIX_GF_LENGTH
-  double old_boost_score = 0.0;
-  double mv_ratio_accumulator_thresh;
-  int active_max_gf_interval;
-  int active_min_gf_interval;
-#endif
-  double gf_group_err = 0.0;
-#if GROUP_ADAPTIVE_MAXQ
-  double gf_group_raw_error = 0.0;
-#endif
-  double gf_group_skip_pct = 0.0;
-  double gf_group_inactive_zone_rows = 0.0;
-  double gf_first_frame_err = 0.0;
-  double mod_frame_err = 0.0;
-
-  double mv_ratio_accumulator = 0.0;
-  double decay_accumulator = 1.0;
-  double zero_motion_accumulator = 1.0;
-
-  double loop_decay_rate = 1.00;
-  double last_loop_decay_rate = 1.00;
-
-  double this_frame_mv_in_out = 0.0;
-  double mv_in_out_accumulator = 0.0;
-  double abs_mv_in_out_accumulator = 0.0;
-
-  unsigned int allow_alt_ref = is_altref_enabled(cpi);
-
-  int f_boost = 0;
-  int b_boost = 0;
-  int flash_detected;
-  int64_t gf_group_bits;
-  double gf_group_error_left;
-  int gf_arf_bits;
-  const int is_key_frame = frame_is_intra_only(cm);
-  const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
-
-  cpi->extra_arf_allowed = 1;
-
-  // Reset the GF group data structures unless this is a key
-  // frame in which case it will already have been done.
-  if (is_key_frame == 0) {
-    av1_zero(twopass->gf_group);
-  }
-
-  aom_clear_system_state();
-  av1_zero(next_frame);
-
-  // Load stats for the current frame.
-  mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
-
-  // Note the error of the frame at the start of the group. This will be
-  // the GF frame error if we code a normal gf.
-  gf_first_frame_err = mod_frame_err;
-
-  // If this is a key frame or the overlay from a previous arf then
-  // the error score / cost of this frame has already been accounted for.
-  if (arf_active_or_kf) {
-    gf_group_err -= gf_first_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
-    gf_group_raw_error -= this_frame->coded_error;
-#endif
-    gf_group_skip_pct -= this_frame->intra_skip_pct;
-    gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
-  }
-#if !CONFIG_FIX_GF_LENGTH
-  // Motion breakout threshold for loop below depends on image size.
-  mv_ratio_accumulator_thresh =
-      (cpi->initial_height + cpi->initial_width) / 4.0;
-  // Set a maximum and minimum interval for the GF group.
-  // If the image appears almost completely static we can extend beyond this.
-  {
-    int int_max_q = (int)(av1_convert_qindex_to_q(
-        twopass->active_worst_quality, cpi->common.seq_params.bit_depth));
-    int int_lbq = (int)(av1_convert_qindex_to_q(
-        rc->last_boosted_qindex, cpi->common.seq_params.bit_depth));
-
-    active_min_gf_interval = rc->min_gf_interval + AOMMIN(2, int_max_q / 200);
-    if (active_min_gf_interval > rc->max_gf_interval)
-      active_min_gf_interval = rc->max_gf_interval;
-
-    // The value chosen depends on the active Q range. At low Q we have
-    // bits to spare and are better with a smaller interval and smaller boost.
-    // At high Q when there are few bits to spare we are better with a longer
-    // interval to spread the cost of the GF.
-    active_max_gf_interval = 12 + AOMMIN(4, (int_lbq / 6));
-
-    // We have: active_min_gf_interval <= rc->max_gf_interval
-    if (active_max_gf_interval < active_min_gf_interval)
-      active_max_gf_interval = active_min_gf_interval;
-    else if (active_max_gf_interval > rc->max_gf_interval)
-      active_max_gf_interval = rc->max_gf_interval;
-  }
-#endif  // !CONFIG_FIX_GF_LENGTH
-  double avg_sr_coded_error = 0;
-  double avg_raw_err_stdev = 0;
-  int non_zero_stdev_count = 0;
-
-  i = 0;
-  while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
-    ++i;
-
-    // Accumulate error score of frames in this gf group.
-    mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
-    gf_group_err += mod_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
-    gf_group_raw_error += this_frame->coded_error;
-#endif
-    gf_group_skip_pct += this_frame->intra_skip_pct;
-    gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
-
-    if (EOF == input_stats(twopass, &next_frame)) break;
-
-    // Test for the case where there is a brief flash but the prediction
-    // quality back to an earlier frame is then restored.
-    flash_detected = detect_flash(twopass, 0);
-
-    // Update the motion related elements to the boost calculation.
-    accumulate_frame_motion_stats(
-        &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
-        &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
-    // sum up the metric values of current gf group
-    avg_sr_coded_error += next_frame.sr_coded_error;
-    if (fabs(next_frame.raw_error_stdev) > 0.000001) {
-      non_zero_stdev_count++;
-      avg_raw_err_stdev += next_frame.raw_error_stdev;
-    }
-
-    // Accumulate the effect of prediction quality decay.
-    if (!flash_detected) {
-      last_loop_decay_rate = loop_decay_rate;
-      loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
-
-      decay_accumulator = decay_accumulator * loop_decay_rate;
-
-      // Monitor for static sections.
-      zero_motion_accumulator = AOMMIN(
-          zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
-
-      // Break clause to detect very still sections after motion. For example,
-      // a static image after a fade or other transition.
-      if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
-                                     last_loop_decay_rate)) {
-        allow_alt_ref = 0;
-        break;
-      }
-    }
-
-    // Calculate a boost number for this frame.
-    boost_score +=
-        decay_accumulator *
-        calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out, GF_MAX_BOOST);
-#if CONFIG_FIX_GF_LENGTH
-    if (i == (FIXED_GF_LENGTH + 1)) break;
-#else
-    // Skip breaking condition for CONFIG_FIX_GF_LENGTH
-    // Break out conditions.
-    if (
-        // Break at active_max_gf_interval unless almost totally static.
-        (i >= (active_max_gf_interval + arf_active_or_kf) &&
-         zero_motion_accumulator < 0.995) ||
-        (
-            // Don't break out with a very short interval.
-            (i >= active_min_gf_interval + arf_active_or_kf) &&
-            (!flash_detected) &&
-            ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
-             (abs_mv_in_out_accumulator > 3.0) ||
-             (mv_in_out_accumulator < -2.0) ||
-             ((boost_score - old_boost_score) < BOOST_BREAKOUT)))) {
-      // If GF group interval is < 12, we force it to be 8. Otherwise,
-      // if it is >= 12, we keep it as is.
-      // NOTE: 'i' is 1 more than the GF group interval candidate that is being
-      //       checked.
-      if (i == (8 + 1) || i >= (12 + 1)) {
-        boost_score = old_boost_score;
-        break;
-      }
-    }
-    old_boost_score = boost_score;
-#endif  // CONFIG_FIX_GF_LENGTH
-    *this_frame = next_frame;
-  }
-  twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
-
-  // Was the group length constrained by the requirement for a new KF?
-  rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
-
-  const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
-                                                             : cpi->common.MBs;
-  assert(num_mbs > 0);
-  if (i) avg_sr_coded_error /= i;
-
-  if (non_zero_stdev_count) avg_raw_err_stdev /= non_zero_stdev_count;
-
-  // Disable extra altrefs and backward refs for "still" gf group:
-  //   zero_motion_accumulator: minimum percentage of (0,0) motion;
-  //   avg_sr_coded_error:      average of the SSE per pixel of each frame;
-  //   avg_raw_err_stdev:       average of the standard deviation of (0,0)
-  //                            motion error per block of each frame.
-  const int disable_bwd_extarf =
-      (zero_motion_accumulator > MIN_ZERO_MOTION &&
-       avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR &&
-       avg_raw_err_stdev < MAX_RAW_ERR_VAR);
-
-  if (disable_bwd_extarf) cpi->extra_arf_allowed = 0;
-
-#define REDUCE_GF_LENGTH_THRESH 4
-#define REDUCE_GF_LENGTH_TO_KEY_THRESH 9
-#define REDUCE_GF_LENGTH_BY 1
-  int alt_offset = 0;
-#if REDUCE_LAST_GF_LENGTH
-  // TODO(weitinglin): The length reduction stretagy is tweaking using AOM_Q
-  // mode, and hurting the performance of VBR mode. We need to investigate how
-  // to adjust GF length for other modes.
-
-  int allow_gf_length_reduction =
-      cpi->oxcf.rc_mode == AOM_Q || cpi->extra_arf_allowed == 0;
-
-  // We are going to have an alt ref, but we don't have do adjustment for
-  // lossless mode
-  if (allow_alt_ref && allow_gf_length_reduction &&
-      (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval) &&
-      !is_lossless_requested(&cpi->oxcf)) {
-    // adjust length of this gf group if one of the following condition met
-    // 1: only one overlay frame left and this gf is too long
-    // 2: next gf group is too short to have arf compared to the current gf
-
-    // maximum length of next gf group
-    const int next_gf_len = rc->frames_to_key - i;
-    const int single_overlay_left =
-        next_gf_len == 0 && i > REDUCE_GF_LENGTH_THRESH;
-    // the next gf is probably going to have a ARF but it will be shorter than
-    // this gf
-    const int unbalanced_gf =
-        i > REDUCE_GF_LENGTH_TO_KEY_THRESH &&
-        next_gf_len + 1 < REDUCE_GF_LENGTH_TO_KEY_THRESH &&
-        next_gf_len + 1 >= rc->min_gf_interval;
-
-    if (single_overlay_left || unbalanced_gf) {
-      // Note: Tried roll_back = DIVIDE_AND_ROUND(i, 8), but is does not work
-      // better in the current setting
-      const int roll_back = REDUCE_GF_LENGTH_BY;
-      alt_offset = -roll_back;
-      i -= roll_back;
-    }
-  }
-#endif
-
-  // Should we use the alternate reference frame.
-  if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
-      (i >= rc->min_gf_interval)) {
-    // Calculate the boost for alt ref.
-    rc->gfu_boost =
-        calc_arf_boost(cpi, alt_offset, (i - 1), (i - 1), &f_boost, &b_boost);
-    rc->source_alt_ref_pending = 1;
-
-    // do not replace ARFs with overlay frames, and keep it as GOLDEN_REF
-    cpi->preserve_arf_as_gld = 1;
-  } else {
-    rc->gfu_boost = AOMMAX((int)boost_score, MIN_ARF_GF_BOOST);
-    rc->source_alt_ref_pending = 0;
-    cpi->preserve_arf_as_gld = 0;
-  }
-
-  // Set the interval until the next gf.
-  // If forward keyframes are enabled, ensure the final gf group obeys the
-  // MIN_FWD_KF_INTERVAL.
-  if (cpi->oxcf.fwd_kf_enabled &&
-      ((twopass->stats_in - i + rc->frames_to_key) < twopass->stats_in_end)) {
-    if (i == rc->frames_to_key) {
-      rc->baseline_gf_interval = i;
-      // if the last gf group will be smaller than MIN_FWD_KF_INTERVAL
-    } else if ((rc->frames_to_key - i <
-                AOMMAX(MIN_FWD_KF_INTERVAL, rc->min_gf_interval)) &&
-               (rc->frames_to_key != i)) {
-      // if possible, merge the last two gf groups
-      if (rc->frames_to_key <= MAX_PYRAMID_SIZE) {
-        rc->baseline_gf_interval = rc->frames_to_key;
-        // if merging the last two gf groups creates a group that is too long,
-        // split them and force the last gf group to be the MIN_FWD_KF_INTERVAL
-      } else {
-        rc->baseline_gf_interval = rc->frames_to_key - MIN_FWD_KF_INTERVAL;
-      }
-    } else {
-      rc->baseline_gf_interval =
-          i - (is_key_frame || rc->source_alt_ref_pending);
-    }
-  } else {
-    rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
-  }
-
-#if REDUCE_LAST_ALT_BOOST
-#define LAST_ALR_BOOST_FACTOR 0.2f
-  rc->arf_boost_factor = 1.0;
-  if (rc->source_alt_ref_pending && !is_lossless_requested(&cpi->oxcf)) {
-    // Reduce the boost of altref in the last gf group
-    if (rc->frames_to_key - i == REDUCE_GF_LENGTH_BY ||
-        rc->frames_to_key - i == 0) {
-      rc->arf_boost_factor = LAST_ALR_BOOST_FACTOR;
-    }
-  }
-#endif
-
-  if (!cpi->extra_arf_allowed) {
-    cpi->num_extra_arfs = 0;
-  } else {
-#if USE_SYMM_MULTI_LAYER
-    if (rc->baseline_gf_interval == 4 && rc->source_alt_ref_pending)
-      cpi->num_extra_arfs = 1;
-    else
-      cpi->num_extra_arfs = get_number_of_extra_arfs(
-          rc->baseline_gf_interval, rc->source_alt_ref_pending);
-#else
-    // Compute how many extra alt_refs we can have
-    cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
-                                                   rc->source_alt_ref_pending);
-#endif  // USE_SYMM_MULTI_LAYER
-  }
-
-#if !USE_SYMM_MULTI_LAYER
-  // Currently at maximum two extra ARFs' are allowed
-  assert(cpi->num_extra_arfs <= MAX_EXT_ARFS);
-#endif
-
-  rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-
-  rc->bipred_group_interval = BFG_INTERVAL;
-  // The minimum bi-predictive frame group interval is 2.
-  if (rc->bipred_group_interval < 2) rc->bipred_group_interval = 0;
-
-  // Reset the file position.
-  reset_fpf_position(twopass, start_pos);
-
-  // Calculate the bits to be allocated to the gf/arf group as a whole
-  gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
-
-#if GROUP_ADAPTIVE_MAXQ
-  // Calculate an estimate of the maxq needed for the group.
-  // We are more agressive about correcting for sections
-  // where there could be significant overshoot than for easier
-  // sections where we do not wish to risk creating an overshoot
-  // of the allocated bit budget.
-  if ((cpi->oxcf.rc_mode != AOM_Q) && (rc->baseline_gf_interval > 1)) {
-    const int vbr_group_bits_per_frame =
-        (int)(gf_group_bits / rc->baseline_gf_interval);
-    const double group_av_err = gf_group_raw_error / rc->baseline_gf_interval;
-    const double group_av_skip_pct =
-        gf_group_skip_pct / rc->baseline_gf_interval;
-    const double group_av_inactive_zone =
-        ((gf_group_inactive_zone_rows * 2) /
-         (rc->baseline_gf_interval * (double)cm->mb_rows));
-
-    int tmp_q;
-    // rc factor is a weight factor that corrects for local rate control drift.
-    double rc_factor = 1.0;
-    if (rc->rate_error_estimate > 0) {
-      rc_factor = AOMMAX(RC_FACTOR_MIN,
-                         (double)(100 - rc->rate_error_estimate) / 100.0);
-    } else {
-      rc_factor = AOMMIN(RC_FACTOR_MAX,
-                         (double)(100 - rc->rate_error_estimate) / 100.0);
-    }
-    tmp_q = get_twopass_worst_quality(
-        cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone),
-        vbr_group_bits_per_frame, twopass->kfgroup_inter_fraction * rc_factor);
-    twopass->active_worst_quality =
-        AOMMAX(tmp_q, twopass->active_worst_quality >> 1);
-  }
-#endif
-
-  // Calculate the extra bits to be used for boosted frame(s)
-  gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, rc->gfu_boost,
-                                     gf_group_bits);
-
-  // Adjust KF group bits and error remaining.
-  twopass->kf_group_error_left -= (int64_t)gf_group_err;
-
-  // If this is an arf update we want to remove the score for the overlay
-  // frame at the end which will usually be very cheap to code.
-  // The overlay frame has already, in effect, been coded so we want to spread
-  // the remaining bits among the other frames.
-  // For normal GFs remove the score for the GF itself unless this is
-  // also a key frame in which case it has already been accounted for.
-  if (rc->source_alt_ref_pending) {
-    gf_group_error_left = gf_group_err - mod_frame_err;
-  } else if (is_key_frame == 0) {
-    gf_group_error_left = gf_group_err - gf_first_frame_err;
-  } else {
-    gf_group_error_left = gf_group_err;
-  }
-
-  // Allocate bits to each of the frames in the GF group.
-  allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits);
-
-  // Reset the file position.
-  reset_fpf_position(twopass, start_pos);
-
-  // Calculate a section intra ratio used in setting max loop filter.
-  if (cpi->common.frame_type != KEY_FRAME) {
-    twopass->section_intra_rating = calculate_section_intra_ratio(
-        start_pos, twopass->stats_in_end, rc->baseline_gf_interval);
-  }
-}
-
-// Threshold for use of the lagging second reference frame. High second ref
-// usage may point to a transient event like a flash or occlusion rather than
-// a real scene cut.
-#define SECOND_REF_USEAGE_THRESH 0.1
-// Minimum % intra coding observed in first pass (1.0 = 100%)
-#define MIN_INTRA_LEVEL 0.25
-// Minimum ratio between the % of intra coding and inter coding in the first
-// pass after discounting neutral blocks (discounting neutral blocks in this
-// way helps catch scene cuts in clips with very flat areas or letter box
-// format clips with image padding.
-#define INTRA_VS_INTER_THRESH 2.0
-// Hard threshold where the first pass chooses intra for almost all blocks.
-// In such a case even if the frame is not a scene cut coding a key frame
-// may be a good option.
-#define VERY_LOW_INTER_THRESH 0.05
-// Maximum threshold for the relative ratio of intra error score vs best
-// inter error score.
-#define KF_II_ERR_THRESHOLD 2.5
-// In real scene cuts there is almost always a sharp change in the intra
-// or inter error score.
-#define ERR_CHANGE_THRESHOLD 0.4
-// For real scene cuts we expect an improvment in the intra inter error
-// ratio in the next frame.
-#define II_IMPROVEMENT_THRESHOLD 3.5
-#define KF_II_MAX 128.0
-
-static int test_candidate_kf(TWO_PASS *twopass,
-                             const FIRSTPASS_STATS *last_frame,
-                             const FIRSTPASS_STATS *this_frame,
-                             const FIRSTPASS_STATS *next_frame) {
-  int is_viable_kf = 0;
-  double pcnt_intra = 1.0 - this_frame->pcnt_inter;
-  double modified_pcnt_inter =
-      this_frame->pcnt_inter - this_frame->pcnt_neutral;
-
-  // Does the frame satisfy the primary criteria of a key frame?
-  // See above for an explanation of the test criteria.
-  // If so, then examine how well it predicts subsequent frames.
-  if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
-      (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
-      ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
-       ((pcnt_intra > MIN_INTRA_LEVEL) &&
-        (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
-        ((this_frame->intra_error /
-          DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) <
-         KF_II_ERR_THRESHOLD) &&
-        ((fabs(last_frame->coded_error - this_frame->coded_error) /
-              DOUBLE_DIVIDE_CHECK(this_frame->coded_error) >
-          ERR_CHANGE_THRESHOLD) ||
-         (fabs(last_frame->intra_error - this_frame->intra_error) /
-              DOUBLE_DIVIDE_CHECK(this_frame->intra_error) >
-          ERR_CHANGE_THRESHOLD) ||
-         ((next_frame->intra_error /
-           DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) >
-          II_IMPROVEMENT_THRESHOLD))))) {
-    int i;
-    const FIRSTPASS_STATS *start_pos = twopass->stats_in;
-    FIRSTPASS_STATS local_next_frame = *next_frame;
-    double boost_score = 0.0;
-    double old_boost_score = 0.0;
-    double decay_accumulator = 1.0;
-
-    // Examine how well the key frame predicts subsequent frames.
-    for (i = 0; i < 16; ++i) {
-      double next_iiratio = (BOOST_FACTOR * local_next_frame.intra_error /
-                             DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
-
-      if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX;
-
-      // Cumulative effect of decay in prediction quality.
-      if (local_next_frame.pcnt_inter > 0.85)
-        decay_accumulator *= local_next_frame.pcnt_inter;
-      else
-        decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0;
-
-      // Keep a running total.
-      boost_score += (decay_accumulator * next_iiratio);
-
-      // Test various breakout clauses.
-      if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) ||
-          (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) <
-            0.20) &&
-           (next_iiratio < 3.0)) ||
-          ((boost_score - old_boost_score) < 3.0) ||
-          (local_next_frame.intra_error < 200)) {
-        break;
-      }
-
-      old_boost_score = boost_score;
-
-      // Get the next frame details
-      if (EOF == input_stats(twopass, &local_next_frame)) break;
-    }
-
-    // If there is tolerable prediction for at least the next 3 frames then
-    // break out else discard this potential key frame and move on
-    if (boost_score > 30.0 && (i > 3)) {
-      is_viable_kf = 1;
-    } else {
-      // Reset the file position
-      reset_fpf_position(twopass, start_pos);
-
-      is_viable_kf = 0;
-    }
-  }
-
-  return is_viable_kf;
-}
-
-#define FRAMES_TO_CHECK_DECAY 8
-
-static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
-  int i, j;
-  RATE_CONTROL *const rc = &cpi->rc;
-  TWO_PASS *const twopass = &cpi->twopass;
-  GF_GROUP *const gf_group = &twopass->gf_group;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  const FIRSTPASS_STATS first_frame = *this_frame;
-  const FIRSTPASS_STATS *const start_position = twopass->stats_in;
-  FIRSTPASS_STATS next_frame;
-  FIRSTPASS_STATS last_frame;
-  int kf_bits = 0;
-  int loop_decay_counter = 0;
-  double decay_accumulator = 1.0;
-  double av_decay_accumulator = 0.0;
-  double zero_motion_accumulator = 1.0;
-  double boost_score = 0.0;
-  double kf_mod_err = 0.0;
-  double kf_group_err = 0.0;
-  double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
-
-  av1_zero(next_frame);
-
-  cpi->common.frame_type = KEY_FRAME;
-
-  // Reset the GF group data structures.
-  av1_zero(*gf_group);
-
-  // Is this a forced key frame by interval.
-  rc->this_key_frame_forced = rc->next_key_frame_forced;
-
-  // Clear the alt ref active flag and last group multi arf flags as they
-  // can never be set for a key frame.
-  rc->source_alt_ref_active = 0;
-
-  // KF is always a GF so clear frames till next gf counter.
-  rc->frames_till_gf_update_due = 0;
-
-  rc->frames_to_key = 1;
-
-  twopass->kf_group_bits = 0;        // Total bits available to kf group
-  twopass->kf_group_error_left = 0;  // Group modified error score.
-
-  kf_mod_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
-
-  // Initialize the decay rates for the recent frames to check
-  for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0;
-
-  // Find the next keyframe.
-  i = 0;
-  while (twopass->stats_in < twopass->stats_in_end &&
-         rc->frames_to_key < cpi->oxcf.key_freq) {
-    // Accumulate kf group error.
-    kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
-
-    // Load the next frame's stats.
-    last_frame = *this_frame;
-    input_stats(twopass, this_frame);
-
-    // Provided that we are not at the end of the file...
-    if (cpi->oxcf.auto_key && twopass->stats_in < twopass->stats_in_end) {
-      double loop_decay_rate;
-
-      // Check for a scene cut.
-      if (test_candidate_kf(twopass, &last_frame, this_frame,
-                            twopass->stats_in))
-        break;
-
-      // How fast is the prediction quality decaying?
-      loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in);
-
-      // We want to know something about the recent past... rather than
-      // as used elsewhere where we are concerned with decay in prediction
-      // quality since the last GF or KF.
-      recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate;
-      decay_accumulator = 1.0;
-      for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
-        decay_accumulator *= recent_loop_decay[j];
-
-      // Special check for transition or high motion followed by a
-      // static scene.
-      if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i,
-                                     loop_decay_rate, decay_accumulator))
-        break;
-
-      // Step on to the next frame.
-      ++rc->frames_to_key;
-
-      // If we don't have a real key frame within the next two
-      // key_freq intervals then break out of the loop.
-      if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq) break;
-    } else {
-      ++rc->frames_to_key;
-    }
-    ++i;
-  }
-
-  // If there is a max kf interval set by the user we must obey it.
-  // We already breakout of the loop above at 2x max.
-  // This code centers the extra kf if the actual natural interval
-  // is between 1x and 2x.
-  if (cpi->oxcf.auto_key && rc->frames_to_key > cpi->oxcf.key_freq) {
-    FIRSTPASS_STATS tmp_frame = first_frame;
-
-    rc->frames_to_key /= 2;
-
-    // Reset to the start of the group.
-    reset_fpf_position(twopass, start_position);
-
-    kf_group_err = 0.0;
-
-    // Rescan to get the correct error data for the forced kf group.
-    for (i = 0; i < rc->frames_to_key; ++i) {
-      kf_group_err += calculate_modified_err(cpi, twopass, oxcf, &tmp_frame);
-      input_stats(twopass, &tmp_frame);
-    }
-    rc->next_key_frame_forced = 1;
-  } else if (twopass->stats_in == twopass->stats_in_end ||
-             rc->frames_to_key >= cpi->oxcf.key_freq) {
-    rc->next_key_frame_forced = 1;
-  } else {
-    rc->next_key_frame_forced = 0;
-  }
-
-  // Special case for the last key frame of the file.
-  if (twopass->stats_in >= twopass->stats_in_end) {
-    // Accumulate kf group error.
-    kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
-  }
-
-  // Calculate the number of bits that should be assigned to the kf group.
-  if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) {
-    // Maximum number of bits for a single normal frame (not key frame).
-    const int max_bits = frame_max_bits(rc, &cpi->oxcf);
-
-    // Maximum number of bits allocated to the key frame group.
-    int64_t max_grp_bits;
-
-    // Default allocation based on bits left and relative
-    // complexity of the section.
-    twopass->kf_group_bits = (int64_t)(
-        twopass->bits_left * (kf_group_err / twopass->modified_error_left));
-
-    // Clip based on maximum per frame rate defined by the user.
-    max_grp_bits = (int64_t)max_bits * (int64_t)rc->frames_to_key;
-    if (twopass->kf_group_bits > max_grp_bits)
-      twopass->kf_group_bits = max_grp_bits;
-  } else {
-    twopass->kf_group_bits = 0;
-  }
-  twopass->kf_group_bits = AOMMAX(0, twopass->kf_group_bits);
-
-  // Reset the first pass file position.
-  reset_fpf_position(twopass, start_position);
-
-  // Scan through the kf group collating various stats used to determine
-  // how many bits to spend on it.
-  decay_accumulator = 1.0;
-  boost_score = 0.0;
-  const double kf_max_boost =
-      cpi->oxcf.rc_mode == AOM_Q
-          ? AOMMIN(AOMMAX(rc->frames_to_key * 2.0, KF_MIN_FRAME_BOOST),
-                   KF_MAX_FRAME_BOOST)
-          : KF_MAX_FRAME_BOOST;
-  for (i = 0; i < (rc->frames_to_key - 1); ++i) {
-    if (EOF == input_stats(twopass, &next_frame)) break;
-
-    // Monitor for static sections.
-    zero_motion_accumulator = AOMMIN(zero_motion_accumulator,
-                                     get_zero_motion_factor(cpi, &next_frame));
-
-    // Not all frames in the group are necessarily used in calculating boost.
-    if ((i <= rc->max_gf_interval) ||
-        ((i <= (rc->max_gf_interval * 4)) && (decay_accumulator > 0.5))) {
-      const double frame_boost =
-          calc_frame_boost(cpi, this_frame, 0, kf_max_boost);
-
-      // How fast is prediction quality decaying.
-      if (!detect_flash(twopass, 0)) {
-        const double loop_decay_rate =
-            get_prediction_decay_rate(cpi, &next_frame);
-        decay_accumulator *= loop_decay_rate;
-        decay_accumulator = AOMMAX(decay_accumulator, MIN_DECAY_FACTOR);
-        av_decay_accumulator += decay_accumulator;
-        ++loop_decay_counter;
-      }
-      boost_score += (decay_accumulator * frame_boost);
-    }
-  }
-  if (loop_decay_counter > 0)
-    av_decay_accumulator /= (double)loop_decay_counter;
-
-  reset_fpf_position(twopass, start_position);
-
-  // Store the zero motion percentage
-  twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
-
-  // Calculate a section intra ratio used in setting max loop filter.
-  twopass->section_intra_rating = calculate_section_intra_ratio(
-      start_position, twopass->stats_in_end, rc->frames_to_key);
-
-  // Apply various clamps for min and max boost
-  rc->kf_boost = (int)(av_decay_accumulator * boost_score);
-  rc->kf_boost = AOMMAX(rc->kf_boost, (rc->frames_to_key * 3));
-  rc->kf_boost = AOMMAX(rc->kf_boost, MIN_KF_BOOST);
-
-  // Work out how many bits to allocate for the key frame itself.
-  kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,
-                                 twopass->kf_group_bits);
-  // printf("kf boost = %d kf_bits = %d kf_zeromotion_pct = %d\n", rc->kf_boost,
-  //        kf_bits, twopass->kf_zeromotion_pct);
-
-  // Work out the fraction of the kf group bits reserved for the inter frames
-  // within the group after discounting the bits for the kf itself.
-  if (twopass->kf_group_bits) {
-    twopass->kfgroup_inter_fraction =
-        (double)(twopass->kf_group_bits - kf_bits) /
-        (double)twopass->kf_group_bits;
-  } else {
-    twopass->kfgroup_inter_fraction = 1.0;
-  }
-
-  twopass->kf_group_bits -= kf_bits;
-
-  // Save the bits to spend on the key frame.
-  gf_group->bit_allocation[0] = kf_bits;
-  gf_group->update_type[0] = KF_UPDATE;
-  gf_group->rf_level[0] = KF_STD;
-
-  // Note the total error score of the kf group minus the key frame itself.
-  twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
-
-  // Adjust the count of total modified error left.
-  // The count of bits left is adjusted elsewhere based on real coded frame
-  // sizes.
-  twopass->modified_error_left -= kf_group_err;
-}
-
-// Define the reference buffers that will be updated post encode.
-static void configure_buffer_updates(AV1_COMP *cpi) {
-  TWO_PASS *const twopass = &cpi->twopass;
-
-  // NOTE(weitinglin): Should we define another function to take care of
-  // cpi->rc.is_$Source_Type to make this function as it is in the comment?
-
-  cpi->rc.is_src_frame_alt_ref = 0;
-  cpi->rc.is_bwd_ref_frame = 0;
-  cpi->rc.is_last_bipred_frame = 0;
-  cpi->rc.is_bipred_frame = 0;
-  cpi->rc.is_src_frame_ext_arf = 0;
-
-  switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
-    case KF_UPDATE:
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 1;
-      cpi->refresh_bwd_ref_frame = 1;
-      cpi->refresh_alt2_ref_frame = 1;
-      cpi->refresh_alt_ref_frame = 1;
-      break;
-
-    case LF_UPDATE:
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-      break;
-
-    case GF_UPDATE:
-      // TODO(zoeliu): To further investigate whether 'refresh_last_frame' is
-      //               needed.
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 1;
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-      break;
-
-    case OVERLAY_UPDATE:
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 1;
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-
-      cpi->rc.is_src_frame_alt_ref = 1;
-      break;
-
-    case ARF_UPDATE:
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 0;
-      // NOTE: BWDREF does not get updated along with ALTREF_FRAME.
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 1;
-      break;
-
-    case BRF_UPDATE:
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_bwd_ref_frame = 1;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-
-      cpi->rc.is_bwd_ref_frame = 1;
-      break;
-
-    case LAST_BIPRED_UPDATE:
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-
-      cpi->rc.is_last_bipred_frame = 1;
-      break;
-
-    case BIPRED_UPDATE:
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-
-      cpi->rc.is_bipred_frame = 1;
-      break;
-
-    case INTNL_OVERLAY_UPDATE:
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-
-      cpi->rc.is_src_frame_alt_ref = 1;
-      cpi->rc.is_src_frame_ext_arf = 1;
-      break;
-
-    case INTNL_ARF_UPDATE:
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 0;
-#if USE_SYMM_MULTI_LAYER
-      if (cpi->new_bwdref_update_rule == 1) {
-        cpi->refresh_bwd_ref_frame = 1;
-        cpi->refresh_alt2_ref_frame = 0;
-      } else {
-#endif
-        cpi->refresh_bwd_ref_frame = 0;
-        cpi->refresh_alt2_ref_frame = 1;
-#if USE_SYMM_MULTI_LAYER
-      }
-#endif
-      cpi->refresh_alt_ref_frame = 0;
-      break;
-
-    default: assert(0); break;
-  }
-}
-
-void av1_configure_buffer_updates_firstpass(AV1_COMP *cpi,
-                                            FRAME_UPDATE_TYPE update_type) {
-  RATE_CONTROL *rc = &cpi->rc;
-
-  cpi->refresh_last_frame = 1;
-  cpi->refresh_golden_frame = 0;
-  cpi->refresh_bwd_ref_frame = 0;
-  cpi->refresh_alt2_ref_frame = 0;
-  cpi->refresh_alt_ref_frame = 0;
-
-  rc->is_bwd_ref_frame = 0;
-
-  switch (update_type) {
-    case ARF_UPDATE:
-      cpi->refresh_alt_ref_frame = 1;
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-
-      rc->is_src_frame_alt_ref = 0;
-      break;
-    case INTNL_ARF_UPDATE:
-      cpi->refresh_alt2_ref_frame = 1;
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_bwd_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-      rc->is_src_frame_alt_ref = 0;
-      rc->is_src_frame_ext_arf = 0;
-
-      break;
-    case BIPRED_UPDATE:
-      cpi->refresh_bwd_ref_frame = 1;
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 0;
-      cpi->refresh_alt2_ref_frame = 0;
-      cpi->refresh_alt_ref_frame = 0;
-
-      rc->is_bwd_ref_frame = 1;
-      break;
-    default: break;
-  }
-}
-
-static int is_skippable_frame(const AV1_COMP *cpi) {
-  // If the current frame does not have non-zero motion vector detected in the
-  // first  pass, and so do its previous and forward frames, then this frame
-  // can be skipped for partition check, and the partition size is assigned
-  // according to the variance
-  const TWO_PASS *const twopass = &cpi->twopass;
-
-  return (!frame_is_intra_only(&cpi->common) &&
-          twopass->stats_in - 2 > twopass->stats_in_start &&
-          twopass->stats_in < twopass->stats_in_end &&
-          (twopass->stats_in - 1)->pcnt_inter -
-                  (twopass->stats_in - 1)->pcnt_motion ==
-              1 &&
-          (twopass->stats_in - 2)->pcnt_inter -
-                  (twopass->stats_in - 2)->pcnt_motion ==
-              1 &&
-          twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
-}
-
-void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-  TWO_PASS *const twopass = &cpi->twopass;
-  GF_GROUP *const gf_group = &twopass->gf_group;
-  int frames_left;
-  FIRSTPASS_STATS this_frame;
-
-  int target_rate;
-
-  frames_left = (int)(twopass->total_stats.count - cm->current_video_frame);
-
-  if (!twopass->stats_in) return;
-
-  // If this is an arf frame then we dont want to read the stats file or
-  // advance the input pointer as we already have what we need.
-  if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
-      gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
-    configure_buffer_updates(cpi);
-    target_rate = gf_group->bit_allocation[gf_group->index];
-    target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
-    rc->base_frame_target = target_rate;
-
-    if (cpi->no_show_kf) {
-      assert(gf_group->update_type[gf_group->index] == ARF_UPDATE);
-      cm->frame_type = KEY_FRAME;
-    } else {
-      cm->frame_type = INTER_FRAME;
-    }
-
-    // Do the firstpass stats indicate that this frame is skippable for the
-    // partition search?
-    if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) {
-      cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
-    }
-
-    return;
-  }
-
-  aom_clear_system_state();
-
-  if (cpi->oxcf.rc_mode == AOM_Q) {
-    twopass->active_worst_quality = cpi->oxcf.cq_level;
-  } else if (cm->current_video_frame == 0) {
-    // Special case code for first frame.
-    const int section_target_bandwidth =
-        (int)(twopass->bits_left / frames_left);
-    const double section_length = twopass->total_left_stats.count;
-    const double section_error =
-        twopass->total_left_stats.coded_error / section_length;
-    const double section_intra_skip =
-        twopass->total_left_stats.intra_skip_pct / section_length;
-    const double section_inactive_zone =
-        (twopass->total_left_stats.inactive_zone_rows * 2) /
-        ((double)cm->mb_rows * section_length);
-    const int tmp_q = get_twopass_worst_quality(
-        cpi, section_error, section_intra_skip + section_inactive_zone,
-        section_target_bandwidth, DEFAULT_GRP_WEIGHT);
-
-    twopass->active_worst_quality = tmp_q;
-    twopass->baseline_active_worst_quality = tmp_q;
-    rc->ni_av_qi = tmp_q;
-    rc->last_q[INTER_FRAME] = tmp_q;
-    rc->avg_q = av1_convert_qindex_to_q(tmp_q, cm->seq_params.bit_depth);
-    rc->avg_frame_qindex[INTER_FRAME] = tmp_q;
-    rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2;
-    rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME];
-  }
-
-  av1_zero(this_frame);
-  if (EOF == input_stats(twopass, &this_frame)) return;
-
-  // Set the frame content type flag.
-  if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH)
-    twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
-  else
-    twopass->fr_content_type = FC_NORMAL;
-
-  // Keyframe and section processing.
-  if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
-    FIRSTPASS_STATS this_frame_copy;
-    this_frame_copy = this_frame;
-    // Define next KF group and assign bits to it.
-    find_next_key_frame(cpi, &this_frame);
-    this_frame = this_frame_copy;
-  } else {
-    cm->frame_type = INTER_FRAME;
-  }
-
-  // Define a new GF/ARF group. (Should always enter here for key frames).
-  if (rc->frames_till_gf_update_due == 0) {
-    define_gf_group(cpi, &this_frame);
-
-    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-
-#if ARF_STATS_OUTPUT
-    {
-      FILE *fpfile;
-      fpfile = fopen("arf.stt", "a");
-      ++arf_count;
-      fprintf(fpfile, "%10d %10d %10d %10d %10d\n", cm->current_video_frame,
-              rc->frames_till_gf_update_due, rc->kf_boost, arf_count,
-              rc->gfu_boost);
-
-      fclose(fpfile);
-    }
-#endif
-  }
-
-  configure_buffer_updates(cpi);
-
-  // Do the firstpass stats indicate that this frame is skippable for the
-  // partition search?
-  if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) {
-    cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
-  }
-
-  target_rate = gf_group->bit_allocation[gf_group->index];
-
-  if (cpi->common.frame_type == KEY_FRAME)
-    target_rate = av1_rc_clamp_iframe_target_size(cpi, target_rate);
-  else
-    target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
-
-  rc->base_frame_target = target_rate;
-
-  {
-    const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
-                            ? cpi->initial_mbs
-                            : cpi->common.MBs;
-    // The multiplication by 256 reverses a scaling factor of (>> 8)
-    // applied when combining MB error values for the frame.
-    twopass->mb_av_energy = log((this_frame.intra_error / num_mbs) + 1.0);
-    twopass->frame_avg_haar_energy =
-        log((this_frame.frame_avg_wavelet_energy / num_mbs) + 1.0);
-  }
-
-  // Update the total stats remaining structure.
-  subtract_stats(&twopass->total_left_stats, &this_frame);
-}
-
-#define MINQ_ADJ_LIMIT 48
-#define MINQ_ADJ_LIMIT_CQ 20
-#define HIGH_UNDERSHOOT_RATIO 2
-void av1_twopass_postencode_update(AV1_COMP *cpi) {
-  TWO_PASS *const twopass = &cpi->twopass;
-  RATE_CONTROL *const rc = &cpi->rc;
-  const int bits_used = rc->base_frame_target;
-
-  // VBR correction is done through rc->vbr_bits_off_target. Based on the
-  // sign of this value, a limited % adjustment is made to the target rate
-  // of subsequent frames, to try and push it back towards 0. This method
-  // is designed to prevent extreme behaviour at the end of a clip
-  // or group of frames.
-  rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size;
-  twopass->bits_left = AOMMAX(twopass->bits_left - bits_used, 0);
-
-  // Calculate the pct rc error.
-  if (rc->total_actual_bits) {
-    rc->rate_error_estimate =
-        (int)((rc->vbr_bits_off_target * 100) / rc->total_actual_bits);
-    rc->rate_error_estimate = clamp(rc->rate_error_estimate, -100, 100);
-  } else {
-    rc->rate_error_estimate = 0;
-  }
-
-  if (cpi->common.frame_type != KEY_FRAME) {
-    twopass->kf_group_bits -= bits_used;
-    twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct;
-  }
-  twopass->kf_group_bits = AOMMAX(twopass->kf_group_bits, 0);
-
-  // If the rate control is drifting consider adjustment to min or maxq.
-  if ((cpi->oxcf.rc_mode != AOM_Q) &&
-      (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD) &&
-      !cpi->rc.is_src_frame_alt_ref) {
-    const int maxq_adj_limit =
-        rc->worst_quality - twopass->active_worst_quality;
-    const int minq_adj_limit =
-        (cpi->oxcf.rc_mode == AOM_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT);
-
-    // Undershoot.
-    if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) {
-      --twopass->extend_maxq;
-      if (rc->rolling_target_bits >= rc->rolling_actual_bits)
-        ++twopass->extend_minq;
-      // Overshoot.
-    } else if (rc->rate_error_estimate < -cpi->oxcf.over_shoot_pct) {
-      --twopass->extend_minq;
-      if (rc->rolling_target_bits < rc->rolling_actual_bits)
-        ++twopass->extend_maxq;
-    } else {
-      // Adjustment for extreme local overshoot.
-      if (rc->projected_frame_size > (2 * rc->base_frame_target) &&
-          rc->projected_frame_size > (2 * rc->avg_frame_bandwidth))
-        ++twopass->extend_maxq;
-
-      // Unwind undershoot or overshoot adjustment.
-      if (rc->rolling_target_bits < rc->rolling_actual_bits)
-        --twopass->extend_minq;
-      else if (rc->rolling_target_bits > rc->rolling_actual_bits)
-        --twopass->extend_maxq;
-    }
-
-    twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit);
-    twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit);
-
-    // If there is a big and undexpected undershoot then feed the extra
-    // bits back in quickly. One situation where this may happen is if a
-    // frame is unexpectedly almost perfectly predicted by the ARF or GF
-    // but not very well predcited by the previous frame.
-    if (!frame_is_kf_gf_arf(cpi) && !cpi->rc.is_src_frame_alt_ref) {
-      int fast_extra_thresh = rc->base_frame_target / HIGH_UNDERSHOOT_RATIO;
-      if (rc->projected_frame_size < fast_extra_thresh) {
-        rc->vbr_bits_off_target_fast +=
-            fast_extra_thresh - rc->projected_frame_size;
-        rc->vbr_bits_off_target_fast =
-            AOMMIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
-
-        // Fast adaptation of minQ if necessary to use up the extra bits.
-        if (rc->avg_frame_bandwidth) {
-          twopass->extend_minq_fast =
-              (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth);
-        }
-        twopass->extend_minq_fast = AOMMIN(
-            twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
-      } else if (rc->vbr_bits_off_target_fast) {
-        twopass->extend_minq_fast = AOMMIN(
-            twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
-      } else {
-        twopass->extend_minq_fast = 0;
-      }
-    }
-  }
-}
diff --git a/third_party/aom/av1/encoder/firstpass.h b/third_party/aom/av1/encoder/firstpass.h
deleted file mode 100644
index 4b7325ae2..000000000
--- a/third_party/aom/av1/encoder/firstpass.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_FIRSTPASS_H_
-#define AOM_AV1_ENCODER_FIRSTPASS_H_
-
-#include "av1/common/enums.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/encoder/lookahead.h"
-#include "av1/encoder/ratectrl.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if CONFIG_FP_MB_STATS
-
-#define FPMB_DCINTRA_MASK 0x01
-
-#define FPMB_MOTION_ZERO_MASK 0x02
-#define FPMB_MOTION_LEFT_MASK 0x04
-#define FPMB_MOTION_RIGHT_MASK 0x08
-#define FPMB_MOTION_UP_MASK 0x10
-#define FPMB_MOTION_DOWN_MASK 0x20
-
-#define FPMB_ERROR_SMALL_MASK 0x40
-#define FPMB_ERROR_LARGE_MASK 0x80
-#define FPMB_ERROR_SMALL_TH 2000
-#define FPMB_ERROR_LARGE_TH 48000
-
-typedef struct {
-  uint8_t *mb_stats_start;
-  uint8_t *mb_stats_end;
-} FIRSTPASS_MB_STATS;
-#endif
-
-// Length of the bi-predictive frame group (BFG)
-// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
-//       number of bi-predictive frames.
-#define BFG_INTERVAL 2
-// The maximum number of extra ALTREF's except ALTREF_FRAME
-#define MAX_EXT_ARFS (REF_FRAMES - BWDREF_FRAME - 1)
-
-#define MIN_EXT_ARF_INTERVAL 4
-
-#define MIN_ZERO_MOTION 0.95
-#define MAX_SR_CODED_ERROR 40
-#define MAX_RAW_ERR_VAR 2000
-#define MIN_MV_IN_OUT 0.4
-
-#define VLOW_MOTION_THRESHOLD 950
-
-typedef struct {
-  double frame;
-  double weight;
-  double intra_error;
-  double frame_avg_wavelet_energy;
-  double coded_error;
-  double sr_coded_error;
-  double pcnt_inter;
-  double pcnt_motion;
-  double pcnt_second_ref;
-  double pcnt_neutral;
-  double intra_skip_pct;
-  double inactive_zone_rows;  // Image mask rows top and bottom.
-  double inactive_zone_cols;  // Image mask columns at left and right edges.
-  double MVr;
-  double mvr_abs;
-  double MVc;
-  double mvc_abs;
-  double MVrv;
-  double MVcv;
-  double mv_in_out_count;
-  double new_mv_count;
-  double duration;
-  double count;
-  // standard deviation for (0, 0) motion prediction error
-  double raw_error_stdev;
-} FIRSTPASS_STATS;
-
-typedef enum {
-  KF_UPDATE = 0,
-  LF_UPDATE = 1,
-  GF_UPDATE = 2,
-  ARF_UPDATE = 3,
-  OVERLAY_UPDATE = 4,
-  BRF_UPDATE = 5,            // Backward Reference Frame
-  LAST_BIPRED_UPDATE = 6,    // Last Bi-predictive Frame
-  BIPRED_UPDATE = 7,         // Bi-predictive Frame, but not the last one
-  INTNL_OVERLAY_UPDATE = 8,  // Internal Overlay Frame
-  INTNL_ARF_UPDATE = 9,      // Internal Altref Frame (candidate for ALTREF2)
-  FRAME_UPDATE_TYPES = 10
-} FRAME_UPDATE_TYPE;
-
-#define FC_ANIMATION_THRESH 0.15
-typedef enum {
-  FC_NORMAL = 0,
-  FC_GRAPHICS_ANIMATION = 1,
-  FRAME_CONTENT_TYPES = 2
-} FRAME_CONTENT_TYPE;
-
-typedef struct {
-  unsigned char index;
-  RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
-  FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
-  unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
-  unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
-  unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
-#if USE_SYMM_MULTI_LAYER
-  unsigned char arf_pos_in_gf[(MAX_LAG_BUFFERS * 2) + 1];
-  unsigned char pyramid_level[(MAX_LAG_BUFFERS * 2) + 1];
-  unsigned char pyramid_height;
-  unsigned char pyramid_lvl_nodes[MAX_PYRAMID_LVL];
-#endif
-  unsigned char brf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
-  unsigned char bidir_pred_enabled[(MAX_LAG_BUFFERS * 2) + 1];
-  unsigned char ref_fb_idx_map[(MAX_LAG_BUFFERS * 2) + 1][REF_FRAMES];
-  unsigned char refresh_idx[(MAX_LAG_BUFFERS * 2) + 1];
-  unsigned char refresh_flag[(MAX_LAG_BUFFERS * 2) + 1];
-  int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
-} GF_GROUP;
-
-typedef struct {
-  unsigned int section_intra_rating;
-  FIRSTPASS_STATS total_stats;
-  FIRSTPASS_STATS this_frame_stats;
-  const FIRSTPASS_STATS *stats_in;
-  const FIRSTPASS_STATS *stats_in_start;
-  const FIRSTPASS_STATS *stats_in_end;
-  FIRSTPASS_STATS total_left_stats;
-  int first_pass_done;
-  int64_t bits_left;
-  double modified_error_min;
-  double modified_error_max;
-  double modified_error_left;
-  double mb_av_energy;
-  double frame_avg_haar_energy;
-
-#if CONFIG_FP_MB_STATS
-  uint8_t *frame_mb_stats_buf;
-  uint8_t *this_frame_mb_stats;
-  FIRSTPASS_MB_STATS firstpass_mb_stats;
-#endif
-  // An indication of the content type of the current frame
-  FRAME_CONTENT_TYPE fr_content_type;
-
-  // Projected total bits available for a key frame group of frames
-  int64_t kf_group_bits;
-
-  // Error score of frames still to be coded in kf group
-  int64_t kf_group_error_left;
-
-  // The fraction for a kf groups total bits allocated to the inter frames
-  double kfgroup_inter_fraction;
-
-  int sr_update_lag;
-
-  int kf_zeromotion_pct;
-  int last_kfgroup_zeromotion_pct;
-  int gf_zeromotion_pct;
-  int active_worst_quality;
-  int baseline_active_worst_quality;
-  int extend_minq;
-  int extend_maxq;
-  int extend_minq_fast;
-
-  GF_GROUP gf_group;
-} TWO_PASS;
-
-struct AV1_COMP;
-
-void av1_init_first_pass(struct AV1_COMP *cpi);
-void av1_rc_get_first_pass_params(struct AV1_COMP *cpi);
-void av1_first_pass(struct AV1_COMP *cpi, const struct lookahead_entry *source);
-void av1_end_first_pass(struct AV1_COMP *cpi);
-
-void av1_init_second_pass(struct AV1_COMP *cpi);
-void av1_rc_get_second_pass_params(struct AV1_COMP *cpi);
-void av1_configure_buffer_updates_firstpass(struct AV1_COMP *cpi,
-                                            FRAME_UPDATE_TYPE update_type);
-
-// Post encode update of the rate control parameters for 2-pass
-void av1_twopass_postencode_update(struct AV1_COMP *cpi);
-
-static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
-  if (arf_pending && MAX_EXT_ARFS > 0)
-    return interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1)
-               ? MAX_EXT_ARFS
-               : interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS
-                     ? MAX_EXT_ARFS - 1
-                     : 0;
-  else
-    return 0;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_FIRSTPASS_H_
diff --git a/third_party/aom/av1/encoder/global_motion.c b/third_party/aom/av1/encoder/global_motion.c
deleted file mode 100644
index e9f8b0bb4..000000000
--- a/third_party/aom/av1/encoder/global_motion.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <memory.h>
-#include <math.h>
-#include <assert.h>
-
-#include "av1/encoder/global_motion.h"
-
-#include "av1/common/warped_motion.h"
-
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/corner_detect.h"
-#include "av1/encoder/corner_match.h"
-#include "av1/encoder/ransac.h"
-
-#define MAX_CORNERS 4096
-#define MIN_INLIER_PROB 0.1
-
-#define MIN_TRANS_THRESH (1 * GM_TRANS_DECODE_FACTOR)
-
-// Border over which to compute the global motion
-#define ERRORADV_BORDER 0
-
-static const double erroradv_tr[] = { 0.65, 0.60, 0.55 };
-static const double erroradv_prod_tr[] = { 20000, 18000, 16000 };
-
-int is_enough_erroradvantage(double best_erroradvantage, int params_cost,
-                             int erroradv_type) {
-  assert(erroradv_type < GM_ERRORADV_TR_TYPES);
-  return best_erroradvantage < erroradv_tr[erroradv_type] &&
-         best_erroradvantage * params_cost < erroradv_prod_tr[erroradv_type];
-}
-
-static void convert_to_params(const double *params, int32_t *model) {
-  int i;
-  int alpha_present = 0;
-  model[0] = (int32_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
-  model[1] = (int32_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
-  model[0] = (int32_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) *
-             GM_TRANS_DECODE_FACTOR;
-  model[1] = (int32_t)clamp(model[1], GM_TRANS_MIN, GM_TRANS_MAX) *
-             GM_TRANS_DECODE_FACTOR;
-
-  for (i = 2; i < 6; ++i) {
-    const int diag_value = ((i == 2 || i == 5) ? (1 << GM_ALPHA_PREC_BITS) : 0);
-    model[i] = (int32_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
-    model[i] =
-        (int32_t)clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX);
-    alpha_present |= (model[i] != 0);
-    model[i] = (model[i] + diag_value) * GM_ALPHA_DECODE_FACTOR;
-  }
-  for (; i < 8; ++i) {
-    model[i] = (int32_t)floor(params[i] * (1 << GM_ROW3HOMO_PREC_BITS) + 0.5);
-    model[i] = (int32_t)clamp(model[i], GM_ROW3HOMO_MIN, GM_ROW3HOMO_MAX) *
-               GM_ROW3HOMO_DECODE_FACTOR;
-    alpha_present |= (model[i] != 0);
-  }
-
-  if (!alpha_present) {
-    if (abs(model[0]) < MIN_TRANS_THRESH && abs(model[1]) < MIN_TRANS_THRESH) {
-      model[0] = 0;
-      model[1] = 0;
-    }
-  }
-}
-
-void convert_model_to_params(const double *params, WarpedMotionParams *model) {
-  convert_to_params(params, model->wmmat);
-  model->wmtype = get_gmtype(model);
-  model->invalid = 0;
-}
-
-// Adds some offset to a global motion parameter and handles
-// all of the necessary precision shifts, clamping, and
-// zero-centering.
-static int32_t add_param_offset(int param_index, int32_t param_value,
-                                int32_t offset) {
-  const int scale_vals[3] = { GM_TRANS_PREC_DIFF, GM_ALPHA_PREC_DIFF,
-                              GM_ROW3HOMO_PREC_DIFF };
-  const int clamp_vals[3] = { GM_TRANS_MAX, GM_ALPHA_MAX, GM_ROW3HOMO_MAX };
-  // type of param: 0 - translation, 1 - affine, 2 - homography
-  const int param_type = (param_index < 2 ? 0 : (param_index < 6 ? 1 : 2));
-  const int is_one_centered = (param_index == 2 || param_index == 5);
-
-  // Make parameter zero-centered and offset the shift that was done to make
-  // it compatible with the warped model
-  param_value = (param_value - (is_one_centered << WARPEDMODEL_PREC_BITS)) >>
-                scale_vals[param_type];
-  // Add desired offset to the rescaled/zero-centered parameter
-  param_value += offset;
-  // Clamp the parameter so it does not overflow the number of bits allotted
-  // to it in the bitstream
-  param_value = (int32_t)clamp(param_value, -clamp_vals[param_type],
-                               clamp_vals[param_type]);
-  // Rescale the parameter to WARPEDMODEL_PRECISION_BITS so it is compatible
-  // with the warped motion library
-  param_value *= (1 << scale_vals[param_type]);
-
-  // Undo the zero-centering step if necessary
-  return param_value + (is_one_centered << WARPEDMODEL_PREC_BITS);
-}
-
-static void force_wmtype(WarpedMotionParams *wm, TransformationType wmtype) {
-  switch (wmtype) {
-    case IDENTITY:
-      wm->wmmat[0] = 0;
-      wm->wmmat[1] = 0;
-      AOM_FALLTHROUGH_INTENDED;
-    case TRANSLATION:
-      wm->wmmat[2] = 1 << WARPEDMODEL_PREC_BITS;
-      wm->wmmat[3] = 0;
-      AOM_FALLTHROUGH_INTENDED;
-    case ROTZOOM:
-      wm->wmmat[4] = -wm->wmmat[3];
-      wm->wmmat[5] = wm->wmmat[2];
-      AOM_FALLTHROUGH_INTENDED;
-    case AFFINE: wm->wmmat[6] = wm->wmmat[7] = 0; break;
-    default: assert(0);
-  }
-  wm->wmtype = wmtype;
-}
-
-int64_t refine_integerized_param(WarpedMotionParams *wm,
-                                 TransformationType wmtype, int use_hbd, int bd,
-                                 uint8_t *ref, int r_width, int r_height,
-                                 int r_stride, uint8_t *dst, int d_width,
-                                 int d_height, int d_stride, int n_refinements,
-                                 int64_t best_frame_error) {
-  static const int max_trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 };
-  const int border = ERRORADV_BORDER;
-  int i = 0, p;
-  int n_params = max_trans_model_params[wmtype];
-  int32_t *param_mat = wm->wmmat;
-  int64_t step_error, best_error;
-  int32_t step;
-  int32_t *param;
-  int32_t curr_param;
-  int32_t best_param;
-
-  force_wmtype(wm, wmtype);
-  best_error = av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
-                              dst + border * d_stride + border, border, border,
-                              d_width - 2 * border, d_height - 2 * border,
-                              d_stride, 0, 0, best_frame_error);
-  best_error = AOMMIN(best_error, best_frame_error);
-  step = 1 << (n_refinements - 1);
-  for (i = 0; i < n_refinements; i++, step >>= 1) {
-    for (p = 0; p < n_params; ++p) {
-      int step_dir = 0;
-      // Skip searches for parameters that are forced to be 0
-      param = param_mat + p;
-      curr_param = *param;
-      best_param = curr_param;
-      // look to the left
-      *param = add_param_offset(p, curr_param, -step);
-      step_error =
-          av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
-                         dst + border * d_stride + border, border, border,
-                         d_width - 2 * border, d_height - 2 * border, d_stride,
-                         0, 0, best_error);
-      if (step_error < best_error) {
-        best_error = step_error;
-        best_param = *param;
-        step_dir = -1;
-      }
-
-      // look to the right
-      *param = add_param_offset(p, curr_param, step);
-      step_error =
-          av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
-                         dst + border * d_stride + border, border, border,
-                         d_width - 2 * border, d_height - 2 * border, d_stride,
-                         0, 0, best_error);
-      if (step_error < best_error) {
-        best_error = step_error;
-        best_param = *param;
-        step_dir = 1;
-      }
-      *param = best_param;
-
-      // look to the direction chosen above repeatedly until error increases
-      // for the biggest step size
-      while (step_dir) {
-        *param = add_param_offset(p, best_param, step * step_dir);
-        step_error =
-            av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
-                           dst + border * d_stride + border, border, border,
-                           d_width - 2 * border, d_height - 2 * border,
-                           d_stride, 0, 0, best_error);
-        if (step_error < best_error) {
-          best_error = step_error;
-          best_param = *param;
-        } else {
-          *param = best_param;
-          step_dir = 0;
-        }
-      }
-    }
-  }
-  force_wmtype(wm, wmtype);
-  wm->wmtype = get_gmtype(wm);
-  return best_error;
-}
-
-static INLINE RansacFunc get_ransac_type(TransformationType type) {
-  switch (type) {
-    case AFFINE: return ransac_affine;
-    case ROTZOOM: return ransac_rotzoom;
-    case TRANSLATION: return ransac_translation;
-    default: assert(0); return NULL;
-  }
-}
-
-static unsigned char *downconvert_frame(YV12_BUFFER_CONFIG *frm,
-                                        int bit_depth) {
-  int i, j;
-  uint16_t *orig_buf = CONVERT_TO_SHORTPTR(frm->y_buffer);
-  uint8_t *buf_8bit = frm->y_buffer_8bit;
-  assert(buf_8bit);
-  if (!frm->buf_8bit_valid) {
-    for (i = 0; i < frm->y_height; ++i) {
-      for (j = 0; j < frm->y_width; ++j) {
-        buf_8bit[i * frm->y_stride + j] =
-            orig_buf[i * frm->y_stride + j] >> (bit_depth - 8);
-      }
-    }
-    frm->buf_8bit_valid = 1;
-  }
-  return buf_8bit;
-}
-
-int compute_global_motion_feature_based(TransformationType type,
-                                        YV12_BUFFER_CONFIG *frm,
-                                        YV12_BUFFER_CONFIG *ref, int bit_depth,
-                                        int *num_inliers_by_motion,
-                                        double *params_by_motion,
-                                        int num_motions) {
-  int i;
-  int num_frm_corners, num_ref_corners;
-  int num_correspondences;
-  int *correspondences;
-  int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
-  unsigned char *frm_buffer = frm->y_buffer;
-  unsigned char *ref_buffer = ref->y_buffer;
-  RansacFunc ransac = get_ransac_type(type);
-
-  if (frm->flags & YV12_FLAG_HIGHBITDEPTH) {
-    // The frame buffer is 16-bit, so we need to convert to 8 bits for the
-    // following code. We cache the result until the frame is released.
-    frm_buffer = downconvert_frame(frm, bit_depth);
-  }
-  if (ref->flags & YV12_FLAG_HIGHBITDEPTH) {
-    ref_buffer = downconvert_frame(ref, bit_depth);
-  }
-
-  // compute interest points in images using FAST features
-  num_frm_corners = fast_corner_detect(frm_buffer, frm->y_width, frm->y_height,
-                                       frm->y_stride, frm_corners, MAX_CORNERS);
-  num_ref_corners = fast_corner_detect(ref_buffer, ref->y_width, ref->y_height,
-                                       ref->y_stride, ref_corners, MAX_CORNERS);
-
-  // find correspondences between the two images
-  correspondences =
-      (int *)malloc(num_frm_corners * 4 * sizeof(*correspondences));
-  num_correspondences = determine_correspondence(
-      frm_buffer, (int *)frm_corners, num_frm_corners, ref_buffer,
-      (int *)ref_corners, num_ref_corners, frm->y_width, frm->y_height,
-      frm->y_stride, ref->y_stride, correspondences);
-
-  ransac(correspondences, num_correspondences, num_inliers_by_motion,
-         params_by_motion, num_motions);
-
-  free(correspondences);
-
-  // Set num_inliers = 0 for motions with too few inliers so they are ignored.
-  for (i = 0; i < num_motions; ++i) {
-    if (num_inliers_by_motion[i] < MIN_INLIER_PROB * num_correspondences) {
-      num_inliers_by_motion[i] = 0;
-    }
-  }
-
-  // Return true if any one of the motions has inliers.
-  for (i = 0; i < num_motions; ++i) {
-    if (num_inliers_by_motion[i] > 0) return 1;
-  }
-  return 0;
-}
diff --git a/third_party/aom/av1/encoder/global_motion.h b/third_party/aom/av1/encoder/global_motion.h
deleted file mode 100644
index c7c016c43..000000000
--- a/third_party/aom/av1/encoder/global_motion.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_GLOBAL_MOTION_H_
-#define AOM_AV1_ENCODER_GLOBAL_MOTION_H_
-
-#include "aom/aom_integer.h"
-#include "aom_scale/yv12config.h"
-#include "av1/common/mv.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define RANSAC_NUM_MOTIONS 1
-
-void convert_model_to_params(const double *params, WarpedMotionParams *model);
-
-int is_enough_erroradvantage(double best_erroradvantage, int params_cost,
-                             int erroradv_type);
-
-// Returns the av1_warp_error between "dst" and the result of applying the
-// motion params that result from fine-tuning "wm" to "ref". Note that "wm" is
-// modified in place.
-int64_t refine_integerized_param(WarpedMotionParams *wm,
-                                 TransformationType wmtype, int use_hbd, int bd,
-                                 uint8_t *ref, int r_width, int r_height,
-                                 int r_stride, uint8_t *dst, int d_width,
-                                 int d_height, int d_stride, int n_refinements,
-                                 int64_t best_frame_error);
-
-/*
-  Computes "num_motions" candidate global motion parameters between two frames.
-  The array "params_by_motion" should be length 8 * "num_motions". The ordering
-  of each set of parameters is best described  by the homography:
-
-        [x'     (m2 m3 m0   [x
-    z .  y'  =   m4 m5 m1 *  y
-         1]      m6 m7 1)    1]
-
-  where m{i} represents the ith value in any given set of parameters.
-
-  "num_inliers" should be length "num_motions", and will be populated with the
-  number of inlier feature points for each motion. Params for which the
-  num_inliers entry is 0 should be ignored by the caller.
-*/
-int compute_global_motion_feature_based(TransformationType type,
-                                        YV12_BUFFER_CONFIG *frm,
-                                        YV12_BUFFER_CONFIG *ref, int bit_depth,
-                                        int *num_inliers_by_motion,
-                                        double *params_by_motion,
-                                        int num_motions);
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-#endif  // AOM_AV1_ENCODER_GLOBAL_MOTION_H_
diff --git a/third_party/aom/av1/encoder/grain_test_vectors.h b/third_party/aom/av1/encoder/grain_test_vectors.h
deleted file mode 100644
index 945dc3733..000000000
--- a/third_party/aom/av1/encoder/grain_test_vectors.h
+++ /dev/null
@@ -1,781 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_
-#define AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_
-
-/* Test vectors for emulation of different film grain types.
- * Note that bit depth would be derived from the bitstream and
- * not signaled in film grain metadata. The parameters are valid
- * for any bit depth.
- */
-static aom_film_grain_t film_grain_test_vectors[16] = {
-  /* Test 1 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      { { 16, 0 },
-        { 25, 136 },
-        { 33, 144 },
-        { 41, 160 },
-        { 48, 168 },
-        { 56, 136 },
-        { 67, 128 },
-        { 82, 144 },
-        { 97, 152 },
-        { 113, 144 },
-        { 128, 176 },
-        { 143, 168 },
-        { 158, 176 },
-        { 178, 184 } },
-      14 /* num_points_y */,
-      { { 16, 0 },
-        { 20, 64 },
-        { 28, 88 },
-        { 60, 104 },
-        { 90, 136 },
-        { 105, 160 },
-        { 134, 168 },
-        { 168, 208 } },
-      8 /* num_cb_points */,
-      { { 16, 0 },
-        { 28, 96 },
-        { 56, 80 },
-        { 66, 96 },
-        { 80, 104 },
-        { 108, 96 },
-        { 122, 112 },
-        { 137, 112 },
-        { 169, 176 } },
-      9 /* num_cr_points */,
-      11 /* scaling_shift */,
-      2 /* ar_coeff_lag */,
-      { 0, 0, -58, 0, 0, 0, -76, 100, -43, 0, -51, 82 },
-      { 0, 0, -49, 0, 0, 0, -36, 22, -30, 0, -38, 7, 39 },
-      { 0, 0, -47, 0, 0, 0, -31, 31, -25, 0, -32, 13, -100 },
-      8 /* ar_coeff_shift */,
-      247 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      18 /* cb_offset */,
-      229 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      54 /* cr_offset */,
-      0 /* overlap_flag */,
-      1 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /* chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 2 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      { { 0, 96 }, { 255, 96 } },
-      2 /* num_points_y */,
-      { { 0, 64 }, { 255, 64 } },
-      2 /* num_cb_points */,
-      { { 0, 64 }, { 255, 64 } },
-      2 /* num_cr_points */,
-      11 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          4, 1,   3, 0,   1,  -3, 8,  -3, 7,  -23, 1, -25,
-          0, -10, 6, -17, -4, 53, 36, 5,  -5, -17, 8, 66,
-      },
-      {
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127,
-      },
-      {
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127,
-      },
-      7 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 3 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      { { 0, 192 }, { 255, 192 } },
-      2 /* num_points_y */,
-      { { 0, 128 }, { 255, 128 } },
-      2 /* num_cb_points */,
-      { { 0, 128 }, { 255, 128 } },
-      2 /* num_cr_points */,
-      11 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          4, 1,   3, 0,   1,  -3, 8,  -3, 7,  -23, 1, -25,
-          0, -10, 6, -17, -4, 53, 36, 5,  -5, -17, 8, 66,
-      },
-      {
-          4,   -7, 2,  4,   12, -12, 5,   -8, 6,  8,   -19, -16, 19,
-          -10, -2, 17, -42, 58, -2,  -13, 9,  14, -36, 67,  0,
-      },
-      {
-          4,   -7, 2,  4,   12, -12, 5,   -8, 6,  8,   -19, -16, 19,
-          -10, -2, 17, -42, 58, -2,  -13, 9,  14, -36, 67,  0,
-      },
-      7 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      1 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      1 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 4 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      {
-          { 16, 0 },
-          { 24, 137 },
-          { 53, 146 },
-          { 63, 155 },
-          { 78, 155 },
-          { 107, 150 },
-          { 122, 147 },
-          { 136, 147 },
-          { 166, 153 },
-      },
-      9 /* num_points_y */,
-      {
-          { 16, 0 },
-          { 20, 72 },
-          { 27, 82 },
-          { 33, 91 },
-          { 69, 121 },
-          { 95, 143 },
-          { 108, 154 },
-          { 134, 169 },
-          { 147, 177 },
-      },
-      9 /* num_cb_points */,
-      {
-          { 16, 0 },
-          { 24, 95 },
-          { 54, 93 },
-          { 65, 94 },
-          { 79, 98 },
-          { 109, 107 },
-          { 124, 119 },
-          { 139, 136 },
-          { 169, 170 },
-      },
-      9 /* num_cr_points */,
-      11 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          7,  -9,  2, 4,   7, -12, 7,  -18, 18, -30, -27, -42,
-          13, -20, 7, -18, 6, 107, 55, -2,  -4, -9,  -22, 113,
-      },
-      {
-          -3, -1, -4,  3,   -6,  -2,  3,  1,  -4, -10, -10, -5, -5,
-          -3, -1, -13, -28, -25, -31, -6, -4, 14, -64, 66,  0,
-      },
-      {
-          0,  4, -3, 13,  0,  1,   -3, 0,  -3, -10, -68, -4, -2,
-          -5, 2, -3, -20, 62, -31, 0,  -4, -1, -8,  -29, 0,
-      },
-      8 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 5 */
-  {
-      1 /* apply_grain */,
-      0 /* update_parameters */,
-      { { 0, 64 }, { 255, 64 } },
-      2 /* num_points_y */,
-      {
-          { 0, 96 },
-          { 32, 90 },
-          { 64, 83 },
-          { 96, 76 },
-          { 128, 68 },
-          { 159, 59 },
-          { 191, 48 },
-          { 223, 34 },
-          { 255, 0 },
-      },
-      9 /* num_cb_points */,
-      {
-          { 0, 0 },
-          { 32, 34 },
-          { 64, 48 },
-          { 96, 59 },
-          { 128, 68 },
-          { 159, 76 },
-          { 191, 83 },
-          { 223, 90 },
-          { 255, 96 },
-      },
-      9 /* num_cr_points */,
-      11 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          4, 1,   3, 0,   1,  -3, 8,  -3, 7,  -23, 1, -25,
-          0, -10, 6, -17, -4, 53, 36, 5,  -5, -17, 8, 66,
-      },
-      {
-          -2, 2,  -5, 7,   -6, 4,   -2, -1, 1,  -2,  0,  -2, 2,
-          -3, -5, 13, -13, 6,  -14, 8,  -1, 18, -36, 58, 0,
-      },
-      {
-          -2, -1, -3, 14, -4, -1, -3, 0, -1, 7, -31, 7, 2,
-          0,  1,  0,  -7, 50, -8, -2, 2, 2,  2, -4,  0,
-      },
-      7 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      1 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      1063 /* random_seed */
-  },
-  /* Test 6 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      {
-          { 0, 96 },
-          { 20, 92 },
-          { 39, 88 },
-          { 59, 84 },
-          { 78, 80 },
-          { 98, 75 },
-          { 118, 70 },
-          { 137, 65 },
-          { 157, 60 },
-          { 177, 53 },
-          { 196, 46 },
-          { 216, 38 },
-          { 235, 27 },
-          { 255, 0 },
-      },
-      14 /* num_points_y */,
-      { { 0, 0 } },
-      0 /* num_cb_points */,
-      { { 0, 0 } },
-      0 /* num_cr_points */,
-      11 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          4, 1,   3, 0,   1,  -3, 8,  -3, 7,  -23, 1, -25,
-          0, -10, 6, -17, -4, 53, 36, 5,  -5, -17, 8, 66,
-      },
-      {
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      },
-      {
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      },
-      7 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      1 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      2754 /* random_seed */
-  },
-  /* Test 7 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      {
-          { 0, 0 },
-          { 20, 27 },
-          { 39, 38 },
-          { 59, 46 },
-          { 78, 53 },
-          { 98, 60 },
-          { 118, 65 },
-          { 137, 70 },
-          { 157, 75 },
-          { 177, 80 },
-          { 196, 84 },
-          { 216, 88 },
-          { 235, 92 },
-          { 255, 96 },
-      },
-      14 /* num_points_y */,
-      { { 0, 0 }, { 255, 0 } },
-      2 /* num_cb_points */,
-      { { 0, 0 }, { 255, 0 } },
-      2 /* num_cr_points */,
-      11 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          4, 1,   3, 0,   1,  -3, 8,  -3, 7,  -23, 1, -25,
-          0, -10, 6, -17, -4, 53, 36, 5,  -5, -17, 8, 66,
-      },
-      {
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      },
-      {
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      },
-      7 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      1 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 8 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      { { 0, 96 }, { 255, 96 } },
-      2 /* num_points_y */,
-      { { 0, 62 }, { 255, 62 } },
-      2 /* num_cb_points */,
-      { { 0, 62 }, { 255, 62 } },
-      2 /* num_cr_points */,
-      11 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          4, 1,   3, 0,   1,  -3, 8,  -3, 7,  -23, 1, -25,
-          0, -10, 6, -17, -4, 53, 36, 5,  -5, -17, 8, 66,
-      },
-      {
-          0,  -2, -2, 8,   5,  -1, 1,   -1, 5,  16,  -33, -9,  6,
-          -1, -3, 10, -47, 63, 0,  -15, 3,  11, -42, 75,  -69,
-      },
-      {
-          1,  -1, -1, 9,   5,  0, 1,   -1, 5,  15,  -32, -10, 8,
-          -2, -4, 11, -46, 62, 1, -16, 3,  13, -43, 75,  -55,
-      },
-      7 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 9 */
-  {
-      1 /* apply_grain */,
-      0 /* update_parameters */,
-      { { 0, 48 }, { 255, 48 } },
-      2 /* num_points_y */,
-      { { 0, 32 }, { 255, 32 } },
-      2 /* num_cb_points */,
-      { { 0, 32 }, { 255, 32 } },
-      2 /* num_cr_points */,
-      10 /* scaling_shift */,
-      2 /* ar_coeff_lag */,
-      { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 },
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 },
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 },
-      8 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 10 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      { { 0, 48 }, { 255, 48 } },
-      2 /* num_points_y */,
-      { { 0, 32 }, { 255, 32 } },
-      2 /* num_cb_points */,
-      { { 0, 32 }, { 255, 32 } },
-      2 /* num_cr_points */,
-      10 /* scaling_shift */,
-      2 /* ar_coeff_lag */,
-      { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 },
-      { -7, -6, -48, -22, 2, -3, -45, 73, -11, -26, -52, 76, 0 },
-      { -7, -6, -48, -22, 2, -3, -45, 73, -11, -26, -52, 76, 0 },
-      8 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 11 */
-  {
-      1 /* apply_grain */,
-      0 /* update_parameters */,
-      { { 0, 32 }, { 255, 32 } },
-      2 /* num_points_y */,
-      {
-          { 0, 48 },
-          { 32, 45 },
-          { 64, 42 },
-          { 96, 38 },
-          { 128, 34 },
-          { 159, 29 },
-          { 191, 24 },
-          { 223, 17 },
-          { 255, 0 },
-      },
-      9 /* num_cb_points */,
-      {
-          { 0, 0 },
-          { 32, 17 },
-          { 64, 24 },
-          { 96, 29 },
-          { 128, 34 },
-          { 159, 38 },
-          { 191, 42 },
-          { 223, 45 },
-          { 255, 48 },
-      },
-      9 /* num_cr_points */,
-      10 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          7,  -9,  2, 4,   7, -12, 7,  -18, 18, -30, -27, -42,
-          13, -20, 7, -18, 6, 107, 55, -2,  -4, -9,  -22, 113,
-      },
-      {
-          -3, -1, -4,  3,   -6,  -2,  3,  1,  -4, -10, -10, -5, -5,
-          -3, -1, -13, -28, -25, -31, -6, -4, 14, -64, 66,  0,
-      },
-      {
-          0,  4, -3, 13,  0,  1,   -3, 0,  -3, -10, -68, -4, -2,
-          -5, 2, -3, -20, 62, -31, 0,  -4, -1, -8,  -29, 0,
-      },
-      8 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      1 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      1357 /* random_seed */
-  },
-  /* Test 12 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      {
-          { 16, 0 },
-          { 24, 49 },
-          { 39, 69 },
-          { 46, 84 },
-          { 53, 91 },
-          { 63, 100 },
-          { 78, 114 },
-          { 92, 134 },
-          { 164, 139 },
-      },
-      9 /* num_points_y */,
-      {
-          { 16, 0 },
-          { 20, 31 },
-          { 26, 42 },
-          { 33, 54 },
-          { 40, 65 },
-          { 47, 72 },
-          { 56, 85 },
-          { 84, 123 },
-          { 152, 157 },
-      },
-      9 /* num_cb_points */,
-      {
-          { 16, 0 },
-          { 25, 14 },
-          { 39, 33 },
-          { 47, 40 },
-          { 54, 47 },
-          { 64, 62 },
-          { 79, 76 },
-          { 94, 83 },
-          { 167, 101 },
-      },
-      9 /* num_cr_points */,
-      10 /* scaling_shift */,
-      2 /* ar_coeff_lag */,
-      { 0, 0, -58, 0, 0, 0, -76, 100, -43, 0, -51, 82 },
-      { 0, 0, -49, 0, 0, 0, -36, 22, -30, 0, -38, 7, 39 },
-      { 0, 0, -47, 0, 0, 0, -31, 31, -25, 0, -32, 13, -100 },
-      8 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      0 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 13 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      {
-          { 0, 48 },
-          { 20, 46 },
-          { 39, 44 },
-          { 59, 42 },
-          { 78, 40 },
-          { 98, 38 },
-          { 118, 35 },
-          { 137, 33 },
-          { 157, 30 },
-          { 177, 27 },
-          { 196, 23 },
-          { 216, 19 },
-          { 235, 13 },
-          { 255, 0 },
-      },
-      14 /* num_points_y */,
-      { { 0, 0 }, { 255, 0 } },
-      0 /* num_cb_points */,
-      { { 0, 0 }, { 255, 0 } },
-      0 /* num_cr_points */,
-      10 /* scaling_shift */,
-      2 /* ar_coeff_lag */,
-      { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 },
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-      8 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 14 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      {
-          { 0, 0 },
-          { 20, 13 },
-          { 39, 19 },
-          { 59, 23 },
-          { 78, 27 },
-          { 98, 30 },
-          { 118, 33 },
-          { 137, 35 },
-          { 157, 38 },
-          { 177, 40 },
-          { 196, 42 },
-          { 216, 44 },
-          { 235, 46 },
-          { 255, 48 },
-      },
-      14 /* num_points_y */,
-      { { 0, 0 }, { 255, 0 } },
-      0 /* num_cb_points */,
-      { { 0, 0 }, { 255, 0 } },
-      0 /* num_cr_points */,
-      10 /* scaling_shift */,
-      2 /* ar_coeff_lag */,
-      { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 },
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-      8 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      1 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 15 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      { { 0, 96 }, { 255, 96 } },
-      1 /* num_points_y */,
-      { { 0, 96 }, { 255, 96 } },
-      0 /* num_cb_points */,
-      { { 0, 96 }, { 255, 96 } },
-      0 /* num_cr_points */,
-      11 /* scaling_shift */,
-      2 /* ar_coeff_lag */,
-      { 5, -15, -10, -19, 0, -12, 6, 51, 30, -5, -12, 56 },
-      { 2, 2, -24, -5, 1, 1, -18, 37, -2, 0, -15, 39, -70 },
-      { 2, 3, -24, -5, -1, 0, -18, 38, -2, 0, -15, 39, -55 },
-      7 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      1 /*chroma_scaling_from_luma*/,
-      0 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-  /* Test 16 */
-  {
-      1 /* apply_grain */,
-      1 /* update_parameters */,
-      {
-          { 16, 0 },
-          { 58, 126 },
-          { 87, 120 },
-          { 97, 122 },
-          { 112, 125 },
-          { 126, 131 },
-          { 141, 139 },
-          { 199, 153 },
-      },
-      8 /* num_points_y */,
-      {
-          { 16, 0 },
-          { 59, 68 },
-          { 66, 76 },
-          { 73, 82 },
-          { 79, 85 },
-          { 86, 86 },
-          { 151, 95 },
-          { 192, 101 },
-      },
-      8 /* num_cb_points */,
-      {
-          { 16, 0 },
-          { 59, 64 },
-          { 89, 80 },
-          { 99, 86 },
-          { 114, 90 },
-          { 129, 93 },
-          { 144, 97 },
-          { 203, 85 },
-      },
-      8 /* num_cr_points */,
-      10 /* scaling_shift */,
-      3 /* ar_coeff_lag */,
-      {
-          4, 1,   3, 0,   1,  -3, 8,  -3, 7,  -23, 1, -25,
-          0, -10, 6, -17, -4, 53, 36, 5,  -5, -17, 8, 66,
-      },
-      {
-          0,  -2, -2, 8,   5,  -1, 1,   -1, 5,  16,  -33, -9,  6,
-          -1, -3, 10, -47, 63, 0,  -15, 3,  11, -42, 75,  -69,
-      },
-      {
-          1,  -1, -1, 9,   5,  0, 1,   -1, 5,  15,  -32, -10, 8,
-          -2, -4, 11, -46, 62, 1, -16, 3,  13, -43, 75,  -55,
-      },
-      7 /* ar_coeff_shift */,
-      128 /* cb_mult */,
-      192 /* cb_luma_mult */,
-      256 /* cb_offset */,
-      128 /* cr_mult */,
-      192 /* cr_luma_mult */,
-      256 /* cr_offset */,
-      1 /* overlap_flag */,
-      0 /* clip_to_restricted_range */,
-      8 /* bit_depth */,
-      0 /*chroma_scaling_from_luma*/,
-      2 /* grain_scale_shift*/,
-      45231 /* random_seed */
-  },
-};
-#endif  // AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_
diff --git a/third_party/aom/av1/encoder/hash.c b/third_party/aom/av1/encoder/hash.c
deleted file mode 100644
index 180115d9f..000000000
--- a/third_party/aom/av1/encoder/hash.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/hash.h"
-
-static void crc_calculator_process_data(CRC_CALCULATOR *p_crc_calculator,
-                                        uint8_t *pData, uint32_t dataLength) {
-  for (uint32_t i = 0; i < dataLength; i++) {
-    const uint8_t index =
-        (p_crc_calculator->remainder >> (p_crc_calculator->bits - 8)) ^
-        pData[i];
-    p_crc_calculator->remainder <<= 8;
-    p_crc_calculator->remainder ^= p_crc_calculator->table[index];
-  }
-}
-
-static void crc_calculator_reset(CRC_CALCULATOR *p_crc_calculator) {
-  p_crc_calculator->remainder = 0;
-}
-
-static uint32_t crc_calculator_get_crc(CRC_CALCULATOR *p_crc_calculator) {
-  return p_crc_calculator->remainder & p_crc_calculator->final_result_mask;
-}
-
-static void crc_calculator_init_table(CRC_CALCULATOR *p_crc_calculator) {
-  const uint32_t high_bit = 1 << (p_crc_calculator->bits - 1);
-  const uint32_t byte_high_bit = 1 << (8 - 1);
-
-  for (uint32_t value = 0; value < 256; value++) {
-    uint32_t remainder = 0;
-    for (uint8_t mask = byte_high_bit; mask != 0; mask >>= 1) {
-      if (value & mask) {
-        remainder ^= high_bit;
-      }
-
-      if (remainder & high_bit) {
-        remainder <<= 1;
-        remainder ^= p_crc_calculator->trunc_poly;
-      } else {
-        remainder <<= 1;
-      }
-    }
-    p_crc_calculator->table[value] = remainder;
-  }
-}
-
-void av1_crc_calculator_init(CRC_CALCULATOR *p_crc_calculator, uint32_t bits,
-                             uint32_t truncPoly) {
-  p_crc_calculator->remainder = 0;
-  p_crc_calculator->bits = bits;
-  p_crc_calculator->trunc_poly = truncPoly;
-  p_crc_calculator->final_result_mask = (1 << bits) - 1;
-  crc_calculator_init_table(p_crc_calculator);
-}
-
-uint32_t av1_get_crc_value(void *crc_calculator, uint8_t *p, int length) {
-  CRC_CALCULATOR *p_crc_calculator = (CRC_CALCULATOR *)crc_calculator;
-  crc_calculator_reset(p_crc_calculator);
-  crc_calculator_process_data(p_crc_calculator, p, length);
-  return crc_calculator_get_crc(p_crc_calculator);
-}
-
-/* CRC-32C (iSCSI) polynomial in reversed bit order. */
-#define POLY 0x82f63b78
-
-/* Construct table for software CRC-32C calculation. */
-void av1_crc32c_calculator_init(CRC32C *p_crc32c) {
-  uint32_t crc;
-
-  for (int n = 0; n < 256; n++) {
-    crc = n;
-    crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
-    crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
-    crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
-    crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
-    crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
-    crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
-    crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
-    crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1;
-    p_crc32c->table[0][n] = crc;
-  }
-  for (int n = 0; n < 256; n++) {
-    crc = p_crc32c->table[0][n];
-    for (int k = 1; k < 8; k++) {
-      crc = p_crc32c->table[0][crc & 0xff] ^ (crc >> 8);
-      p_crc32c->table[k][n] = crc;
-    }
-  }
-}
-
-/* Table-driven software version as a fall-back.  This is about 15 times slower
- than using the hardware instructions.  This assumes little-endian integers,
- as is the case on Intel processors that the assembler code here is for. */
-uint32_t av1_get_crc32c_value_c(CRC32C *p, uint8_t *buf, size_t len) {
-  const uint8_t *next = (const uint8_t *)(buf);
-  uint64_t crc;
-
-  crc = 0 ^ 0xffffffff;
-  while (len && ((uintptr_t)next & 7) != 0) {
-    crc = p->table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
-    len--;
-  }
-  while (len >= 8) {
-    crc ^= *(uint64_t *)next;
-    crc = p->table[7][crc & 0xff] ^ p->table[6][(crc >> 8) & 0xff] ^
-          p->table[5][(crc >> 16) & 0xff] ^ p->table[4][(crc >> 24) & 0xff] ^
-          p->table[3][(crc >> 32) & 0xff] ^ p->table[2][(crc >> 40) & 0xff] ^
-          p->table[1][(crc >> 48) & 0xff] ^ p->table[0][crc >> 56];
-    next += 8;
-    len -= 8;
-  }
-  while (len) {
-    crc = p->table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
-    len--;
-  }
-  return (uint32_t)crc ^ 0xffffffff;
-}
diff --git a/third_party/aom/av1/encoder/hash.h b/third_party/aom/av1/encoder/hash.h
deleted file mode 100644
index 826c004d6..000000000
--- a/third_party/aom/av1/encoder/hash.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_HASH_H_
-#define AOM_AV1_ENCODER_HASH_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct _crc_calculator {
-  uint32_t remainder;
-  uint32_t trunc_poly;
-  uint32_t bits;
-  uint32_t table[256];
-  uint32_t final_result_mask;
-} CRC_CALCULATOR;
-
-// Initialize the crc calculator. It must be executed at least once before
-// calling av1_get_crc_value().
-void av1_crc_calculator_init(CRC_CALCULATOR *p_crc_calculator, uint32_t bits,
-                             uint32_t truncPoly);
-uint32_t av1_get_crc_value(void *crc_calculator, uint8_t *p, int length);
-
-// CRC32C: POLY = 0x82f63b78;
-typedef struct _CRC32C {
-  /* Table for a quadword-at-a-time software crc. */
-  uint32_t table[8][256];
-} CRC32C;
-
-// init table for software version crc32c
-void av1_crc32c_calculator_init(CRC32C *p_crc32c);
-
-#define AOM_BUFFER_SIZE_FOR_BLOCK_HASH (4096)
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_HASH_H_
diff --git a/third_party/aom/av1/encoder/hash_motion.c b/third_party/aom/av1/encoder/hash_motion.c
deleted file mode 100644
index e85a516e8..000000000
--- a/third_party/aom/av1/encoder/hash_motion.c
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/av1_rtcd.h"
-
-#include "av1/encoder/block.h"
-#include "av1/encoder/hash.h"
-#include "av1/encoder/hash_motion.h"
-
-static const int crc_bits = 16;
-static const int block_size_bits = 3;
-
-static void hash_table_clear_all(hash_table *p_hash_table) {
-  if (p_hash_table->p_lookup_table == NULL) {
-    return;
-  }
-  int max_addr = 1 << (crc_bits + block_size_bits);
-  for (int i = 0; i < max_addr; i++) {
-    if (p_hash_table->p_lookup_table[i] != NULL) {
-      aom_vector_destroy(p_hash_table->p_lookup_table[i]);
-      aom_free(p_hash_table->p_lookup_table[i]);
-      p_hash_table->p_lookup_table[i] = NULL;
-    }
-  }
-}
-
-// TODO(youzhou@microsoft.com): is higher than 8 bits screen content supported?
-// If yes, fix this function
-static void get_pixels_in_1D_char_array_by_block_2x2(uint8_t *y_src, int stride,
-                                                     uint8_t *p_pixels_in1D) {
-  uint8_t *p_pel = y_src;
-  int index = 0;
-  for (int i = 0; i < 2; i++) {
-    for (int j = 0; j < 2; j++) {
-      p_pixels_in1D[index++] = p_pel[j];
-    }
-    p_pel += stride;
-  }
-}
-
-static void get_pixels_in_1D_short_array_by_block_2x2(uint16_t *y_src,
-                                                      int stride,
-                                                      uint16_t *p_pixels_in1D) {
-  uint16_t *p_pel = y_src;
-  int index = 0;
-  for (int i = 0; i < 2; i++) {
-    for (int j = 0; j < 2; j++) {
-      p_pixels_in1D[index++] = p_pel[j];
-    }
-    p_pel += stride;
-  }
-}
-
-static int is_block_2x2_row_same_value(uint8_t *p) {
-  if (p[0] != p[1] || p[2] != p[3]) {
-    return 0;
-  }
-  return 1;
-}
-
-static int is_block16_2x2_row_same_value(uint16_t *p) {
-  if (p[0] != p[1] || p[2] != p[3]) {
-    return 0;
-  }
-  return 1;
-}
-
-static int is_block_2x2_col_same_value(uint8_t *p) {
-  if ((p[0] != p[2]) || (p[1] != p[3])) {
-    return 0;
-  }
-  return 1;
-}
-
-static int is_block16_2x2_col_same_value(uint16_t *p) {
-  if ((p[0] != p[2]) || (p[1] != p[3])) {
-    return 0;
-  }
-  return 1;
-}
-
-// the hash value (hash_value1 consists two parts, the first 3 bits relate to
-// the block size and the remaining 16 bits are the crc values. This fuction
-// is used to get the first 3 bits.
-static int hash_block_size_to_index(int block_size) {
-  switch (block_size) {
-    case 4: return 0;
-    case 8: return 1;
-    case 16: return 2;
-    case 32: return 3;
-    case 64: return 4;
-    case 128: return 5;
-    default: return -1;
-  }
-}
-
-void av1_hash_table_init(hash_table *p_hash_table, MACROBLOCK *x) {
-  if (x->g_crc_initialized == 0) {
-    av1_crc_calculator_init(&x->crc_calculator1, 24, 0x5D6DCB);
-    av1_crc_calculator_init(&x->crc_calculator2, 24, 0x864CFB);
-    x->g_crc_initialized = 1;
-  }
-  p_hash_table->p_lookup_table = NULL;
-}
-
-void av1_hash_table_destroy(hash_table *p_hash_table) {
-  hash_table_clear_all(p_hash_table);
-  aom_free(p_hash_table->p_lookup_table);
-  p_hash_table->p_lookup_table = NULL;
-}
-
-void av1_hash_table_create(hash_table *p_hash_table) {
-  if (p_hash_table->p_lookup_table != NULL) {
-    hash_table_clear_all(p_hash_table);
-    return;
-  }
-  const int max_addr = 1 << (crc_bits + block_size_bits);
-  p_hash_table->p_lookup_table =
-      (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
-  memset(p_hash_table->p_lookup_table, 0,
-         sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
-}
-
-static void hash_table_add_to_table(hash_table *p_hash_table,
-                                    uint32_t hash_value,
-                                    block_hash *curr_block_hash) {
-  if (p_hash_table->p_lookup_table[hash_value] == NULL) {
-    p_hash_table->p_lookup_table[hash_value] =
-        aom_malloc(sizeof(p_hash_table->p_lookup_table[0][0]));
-    aom_vector_setup(p_hash_table->p_lookup_table[hash_value], 10,
-                     sizeof(curr_block_hash[0]));
-    aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
-                         curr_block_hash);
-  } else {
-    aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
-                         curr_block_hash);
-  }
-}
-
-int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value) {
-  if (p_hash_table->p_lookup_table[hash_value] == NULL) {
-    return 0;
-  } else {
-    return (int32_t)(p_hash_table->p_lookup_table[hash_value]->size);
-  }
-}
-
-Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
-                                     uint32_t hash_value) {
-  assert(av1_hash_table_count(p_hash_table, hash_value) > 0);
-  return aom_vector_begin(p_hash_table->p_lookup_table[hash_value]);
-}
-
-int32_t av1_has_exact_match(hash_table *p_hash_table, uint32_t hash_value1,
-                            uint32_t hash_value2) {
-  if (p_hash_table->p_lookup_table[hash_value1] == NULL) {
-    return 0;
-  }
-  Iterator iterator =
-      aom_vector_begin(p_hash_table->p_lookup_table[hash_value1]);
-  Iterator last = aom_vector_end(p_hash_table->p_lookup_table[hash_value1]);
-  for (; !iterator_equals(&iterator, &last); iterator_increment(&iterator)) {
-    if ((*(block_hash *)iterator_get(&iterator)).hash_value2 == hash_value2) {
-      return 1;
-    }
-  }
-  return 0;
-}
-
-void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
-                                       uint32_t *pic_block_hash[2],
-                                       int8_t *pic_block_same_info[3],
-                                       MACROBLOCK *x) {
-  const int width = 2;
-  const int height = 2;
-  const int x_end = picture->y_crop_width - width + 1;
-  const int y_end = picture->y_crop_height - height + 1;
-
-  const int length = width * 2;
-  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
-    uint16_t p[4];
-    int pos = 0;
-    for (int y_pos = 0; y_pos < y_end; y_pos++) {
-      for (int x_pos = 0; x_pos < x_end; x_pos++) {
-        get_pixels_in_1D_short_array_by_block_2x2(
-            CONVERT_TO_SHORTPTR(picture->y_buffer) + y_pos * picture->y_stride +
-                x_pos,
-            picture->y_stride, p);
-        pic_block_same_info[0][pos] = is_block16_2x2_row_same_value(p);
-        pic_block_same_info[1][pos] = is_block16_2x2_col_same_value(p);
-
-        pic_block_hash[0][pos] = av1_get_crc_value(
-            &x->crc_calculator1, (uint8_t *)p, length * sizeof(p[0]));
-        pic_block_hash[1][pos] = av1_get_crc_value(
-            &x->crc_calculator2, (uint8_t *)p, length * sizeof(p[0]));
-        pos++;
-      }
-      pos += width - 1;
-    }
-  } else {
-    uint8_t p[4];
-    int pos = 0;
-    for (int y_pos = 0; y_pos < y_end; y_pos++) {
-      for (int x_pos = 0; x_pos < x_end; x_pos++) {
-        get_pixels_in_1D_char_array_by_block_2x2(
-            picture->y_buffer + y_pos * picture->y_stride + x_pos,
-            picture->y_stride, p);
-        pic_block_same_info[0][pos] = is_block_2x2_row_same_value(p);
-        pic_block_same_info[1][pos] = is_block_2x2_col_same_value(p);
-
-        pic_block_hash[0][pos] =
-            av1_get_crc_value(&x->crc_calculator1, p, length * sizeof(p[0]));
-        pic_block_hash[1][pos] =
-            av1_get_crc_value(&x->crc_calculator2, p, length * sizeof(p[0]));
-        pos++;
-      }
-      pos += width - 1;
-    }
-  }
-}
-
-void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture,
-                                   int block_size,
-                                   uint32_t *src_pic_block_hash[2],
-                                   uint32_t *dst_pic_block_hash[2],
-                                   int8_t *src_pic_block_same_info[3],
-                                   int8_t *dst_pic_block_same_info[3],
-                                   MACROBLOCK *x) {
-  const int pic_width = picture->y_crop_width;
-  const int x_end = picture->y_crop_width - block_size + 1;
-  const int y_end = picture->y_crop_height - block_size + 1;
-
-  const int src_size = block_size >> 1;
-  const int quad_size = block_size >> 2;
-
-  uint32_t p[4];
-  const int length = sizeof(p);
-
-  int pos = 0;
-  for (int y_pos = 0; y_pos < y_end; y_pos++) {
-    for (int x_pos = 0; x_pos < x_end; x_pos++) {
-      p[0] = src_pic_block_hash[0][pos];
-      p[1] = src_pic_block_hash[0][pos + src_size];
-      p[2] = src_pic_block_hash[0][pos + src_size * pic_width];
-      p[3] = src_pic_block_hash[0][pos + src_size * pic_width + src_size];
-      dst_pic_block_hash[0][pos] =
-          av1_get_crc_value(&x->crc_calculator1, (uint8_t *)p, length);
-
-      p[0] = src_pic_block_hash[1][pos];
-      p[1] = src_pic_block_hash[1][pos + src_size];
-      p[2] = src_pic_block_hash[1][pos + src_size * pic_width];
-      p[3] = src_pic_block_hash[1][pos + src_size * pic_width + src_size];
-      dst_pic_block_hash[1][pos] =
-          av1_get_crc_value(&x->crc_calculator2, (uint8_t *)p, length);
-
-      dst_pic_block_same_info[0][pos] =
-          src_pic_block_same_info[0][pos] &&
-          src_pic_block_same_info[0][pos + quad_size] &&
-          src_pic_block_same_info[0][pos + src_size] &&
-          src_pic_block_same_info[0][pos + src_size * pic_width] &&
-          src_pic_block_same_info[0][pos + src_size * pic_width + quad_size] &&
-          src_pic_block_same_info[0][pos + src_size * pic_width + src_size];
-
-      dst_pic_block_same_info[1][pos] =
-          src_pic_block_same_info[1][pos] &&
-          src_pic_block_same_info[1][pos + src_size] &&
-          src_pic_block_same_info[1][pos + quad_size * pic_width] &&
-          src_pic_block_same_info[1][pos + quad_size * pic_width + src_size] &&
-          src_pic_block_same_info[1][pos + src_size * pic_width] &&
-          src_pic_block_same_info[1][pos + src_size * pic_width + src_size];
-      pos++;
-    }
-    pos += block_size - 1;
-  }
-
-  if (block_size >= 4) {
-    const int size_minus_1 = block_size - 1;
-    pos = 0;
-    for (int y_pos = 0; y_pos < y_end; y_pos++) {
-      for (int x_pos = 0; x_pos < x_end; x_pos++) {
-        dst_pic_block_same_info[2][pos] =
-            (!dst_pic_block_same_info[0][pos] &&
-             !dst_pic_block_same_info[1][pos]) ||
-            (((x_pos & size_minus_1) == 0) && ((y_pos & size_minus_1) == 0));
-        pos++;
-      }
-      pos += block_size - 1;
-    }
-  }
-}
-
-void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
-                                                 uint32_t *pic_hash[2],
-                                                 int8_t *pic_is_same,
-                                                 int pic_width, int pic_height,
-                                                 int block_size) {
-  const int x_end = pic_width - block_size + 1;
-  const int y_end = pic_height - block_size + 1;
-
-  const int8_t *src_is_added = pic_is_same;
-  const uint32_t *src_hash[2] = { pic_hash[0], pic_hash[1] };
-
-  int add_value = hash_block_size_to_index(block_size);
-  assert(add_value >= 0);
-  add_value <<= crc_bits;
-  const int crc_mask = (1 << crc_bits) - 1;
-
-  for (int x_pos = 0; x_pos < x_end; x_pos++) {
-    for (int y_pos = 0; y_pos < y_end; y_pos++) {
-      const int pos = y_pos * pic_width + x_pos;
-      // valid data
-      if (src_is_added[pos]) {
-        block_hash curr_block_hash;
-        curr_block_hash.x = x_pos;
-        curr_block_hash.y = y_pos;
-
-        const uint32_t hash_value1 = (src_hash[0][pos] & crc_mask) + add_value;
-        curr_block_hash.hash_value2 = src_hash[1][pos];
-
-        hash_table_add_to_table(p_hash_table, hash_value1, &curr_block_hash);
-      }
-    }
-  }
-}
-
-int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
-                                   int block_size, int x_start, int y_start) {
-  const int stride = picture->y_stride;
-  const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
-
-  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
-    const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
-    for (int i = 0; i < block_size; i++) {
-      for (int j = 1; j < block_size; j++) {
-        if (p16[j] != p16[0]) {
-          return 0;
-        }
-      }
-      p16 += stride;
-    }
-  } else {
-    for (int i = 0; i < block_size; i++) {
-      for (int j = 1; j < block_size; j++) {
-        if (p[j] != p[0]) {
-          return 0;
-        }
-      }
-      p += stride;
-    }
-  }
-
-  return 1;
-}
-
-int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
-                                 int block_size, int x_start, int y_start) {
-  const int stride = picture->y_stride;
-  const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
-
-  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
-    const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
-    for (int i = 0; i < block_size; i++) {
-      for (int j = 1; j < block_size; j++) {
-        if (p16[j * stride + i] != p16[i]) {
-          return 0;
-        }
-      }
-    }
-  } else {
-    for (int i = 0; i < block_size; i++) {
-      for (int j = 1; j < block_size; j++) {
-        if (p[j * stride + i] != p[i]) {
-          return 0;
-        }
-      }
-    }
-  }
-  return 1;
-}
-
-void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
-                              uint32_t *hash_value1, uint32_t *hash_value2,
-                              int use_highbitdepth, MACROBLOCK *x) {
-  uint32_t to_hash[4];
-  const int add_value = hash_block_size_to_index(block_size) << crc_bits;
-  assert(add_value >= 0);
-  const int crc_mask = (1 << crc_bits) - 1;
-
-  // 2x2 subblock hash values in current CU
-  int sub_block_in_width = (block_size >> 1);
-  if (use_highbitdepth) {
-    uint16_t pixel_to_hash[4];
-    uint16_t *y16_src = CONVERT_TO_SHORTPTR(y_src);
-    for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
-      for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
-        int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
-        get_pixels_in_1D_short_array_by_block_2x2(
-            y16_src + y_pos * stride + x_pos, stride, pixel_to_hash);
-        assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
-        x->hash_value_buffer[0][0][pos] =
-            av1_get_crc_value(&x->crc_calculator1, (uint8_t *)pixel_to_hash,
-                              sizeof(pixel_to_hash));
-        x->hash_value_buffer[1][0][pos] =
-            av1_get_crc_value(&x->crc_calculator2, (uint8_t *)pixel_to_hash,
-                              sizeof(pixel_to_hash));
-      }
-    }
-  } else {
-    uint8_t pixel_to_hash[4];
-    for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
-      for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
-        int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
-        get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
-                                                 stride, pixel_to_hash);
-        assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
-        x->hash_value_buffer[0][0][pos] = av1_get_crc_value(
-            &x->crc_calculator1, pixel_to_hash, sizeof(pixel_to_hash));
-        x->hash_value_buffer[1][0][pos] = av1_get_crc_value(
-            &x->crc_calculator2, pixel_to_hash, sizeof(pixel_to_hash));
-      }
-    }
-  }
-
-  int src_sub_block_in_width = sub_block_in_width;
-  sub_block_in_width >>= 1;
-
-  int src_idx = 1;
-  int dst_idx = 0;
-
-  // 4x4 subblock hash values to current block hash values
-  for (int sub_width = 4; sub_width <= block_size; sub_width *= 2) {
-    src_idx = 1 - src_idx;
-    dst_idx = 1 - dst_idx;
-
-    int dst_pos = 0;
-    for (int y_pos = 0; y_pos < sub_block_in_width; y_pos++) {
-      for (int x_pos = 0; x_pos < sub_block_in_width; x_pos++) {
-        int srcPos = (y_pos << 1) * src_sub_block_in_width + (x_pos << 1);
-
-        assert(srcPos + 1 < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
-        assert(srcPos + src_sub_block_in_width + 1 <
-               AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
-        assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
-        to_hash[0] = x->hash_value_buffer[0][src_idx][srcPos];
-        to_hash[1] = x->hash_value_buffer[0][src_idx][srcPos + 1];
-        to_hash[2] =
-            x->hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width];
-        to_hash[3] = x->hash_value_buffer[0][src_idx]
-                                         [srcPos + src_sub_block_in_width + 1];
-
-        x->hash_value_buffer[0][dst_idx][dst_pos] = av1_get_crc_value(
-            &x->crc_calculator1, (uint8_t *)to_hash, sizeof(to_hash));
-
-        to_hash[0] = x->hash_value_buffer[1][src_idx][srcPos];
-        to_hash[1] = x->hash_value_buffer[1][src_idx][srcPos + 1];
-        to_hash[2] =
-            x->hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width];
-        to_hash[3] = x->hash_value_buffer[1][src_idx]
-                                         [srcPos + src_sub_block_in_width + 1];
-        x->hash_value_buffer[1][dst_idx][dst_pos] = av1_get_crc_value(
-            &x->crc_calculator2, (uint8_t *)to_hash, sizeof(to_hash));
-        dst_pos++;
-      }
-    }
-
-    src_sub_block_in_width = sub_block_in_width;
-    sub_block_in_width >>= 1;
-  }
-
-  *hash_value1 = (x->hash_value_buffer[0][dst_idx][0] & crc_mask) + add_value;
-  *hash_value2 = x->hash_value_buffer[1][dst_idx][0];
-}
diff --git a/third_party/aom/av1/encoder/hash_motion.h b/third_party/aom/av1/encoder/hash_motion.h
deleted file mode 100644
index df3ec3215..000000000
--- a/third_party/aom/av1/encoder/hash_motion.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_HASH_MOTION_H_
-#define AOM_AV1_ENCODER_HASH_MOTION_H_
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_scale/yv12config.h"
-#include "third_party/vector/vector.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// store a block's hash info.
-// x and y are the position from the top left of the picture
-// hash_value2 is used to store the second hash value
-typedef struct _block_hash {
-  int16_t x;
-  int16_t y;
-  uint32_t hash_value2;
-} block_hash;
-
-typedef struct _hash_table {
-  Vector **p_lookup_table;
-} hash_table;
-
-void av1_hash_table_init(hash_table *p_hash_table, struct macroblock *x);
-void av1_hash_table_destroy(hash_table *p_hash_table);
-void av1_hash_table_create(hash_table *p_hash_table);
-int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value);
-Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
-                                     uint32_t hash_value);
-int32_t av1_has_exact_match(hash_table *p_hash_table, uint32_t hash_value1,
-                            uint32_t hash_value2);
-void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
-                                       uint32_t *pic_block_hash[2],
-                                       int8_t *pic_block_same_info[3],
-                                       struct macroblock *x);
-void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture,
-                                   int block_size,
-                                   uint32_t *src_pic_block_hash[2],
-                                   uint32_t *dst_pic_block_hash[2],
-                                   int8_t *src_pic_block_same_info[3],
-                                   int8_t *dst_pic_block_same_info[3],
-                                   struct macroblock *x);
-void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
-                                                 uint32_t *pic_hash[2],
-                                                 int8_t *pic_is_same,
-                                                 int pic_width, int pic_height,
-                                                 int block_size);
-
-// check whether the block starts from (x_start, y_start) with the size of
-// block_size x block_size has the same color in all rows
-int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
-                                   int block_size, int x_start, int y_start);
-// check whether the block starts from (x_start, y_start) with the size of
-// block_size x block_size has the same color in all columns
-int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
-                                 int block_size, int x_start, int y_start);
-void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
-                              uint32_t *hash_value1, uint32_t *hash_value2,
-                              int use_highbitdepth, struct macroblock *x);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_HASH_MOTION_H_
diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
deleted file mode 100644
index 67898fd18..000000000
--- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "av1/common/idct.h"
-#include "av1/encoder/hybrid_fwd_txfm.h"
-
-/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
-   pixel. */
-void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
-  int i;
-  tran_high_t a1, b1, c1, d1, e1;
-  const int16_t *ip_pass0 = input;
-  const tran_low_t *ip = NULL;
-  tran_low_t *op = output;
-
-  for (i = 0; i < 4; i++) {
-    a1 = ip_pass0[0 * stride];
-    b1 = ip_pass0[1 * stride];
-    c1 = ip_pass0[2 * stride];
-    d1 = ip_pass0[3 * stride];
-
-    a1 += b1;
-    d1 = d1 - c1;
-    e1 = (a1 - d1) >> 1;
-    b1 = e1 - b1;
-    c1 = e1 - c1;
-    a1 -= c1;
-    d1 += b1;
-    op[0] = (tran_low_t)a1;
-    op[4] = (tran_low_t)c1;
-    op[8] = (tran_low_t)d1;
-    op[12] = (tran_low_t)b1;
-
-    ip_pass0++;
-    op++;
-  }
-  ip = output;
-  op = output;
-
-  for (i = 0; i < 4; i++) {
-    a1 = ip[0];
-    b1 = ip[1];
-    c1 = ip[2];
-    d1 = ip[3];
-
-    a1 += b1;
-    d1 -= c1;
-    e1 = (a1 - d1) >> 1;
-    b1 = e1 - b1;
-    c1 = e1 - c1;
-    a1 -= c1;
-    d1 += b1;
-    op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR);
-    op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR);
-    op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR);
-    op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR);
-
-    ip += 4;
-    op += 4;
-  }
-}
-
-void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
-                          int stride) {
-  av1_fwht4x4_c(input, output, stride);
-}
-
-static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
-                                int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  const int bd = txfm_param->bd;
-  if (txfm_param->lossless) {
-    assert(tx_type == DCT_DCT);
-    av1_highbd_fwht4x4(src_diff, coeff, diff_stride);
-    return;
-  }
-  switch (tx_type) {
-    // use the c version for anything including identity for now
-    case V_DCT:
-    case H_DCT:
-    case V_ADST:
-    case H_ADST:
-    case V_FLIPADST:
-    case H_FLIPADST:
-    case IDTX:
-      av1_fwd_txfm2d_4x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-    default:
-      av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-  }
-}
-
-static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
-                                int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  av1_fwd_txfm2d_4x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
-                       txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
-                                int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  av1_fwd_txfm2d_8x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
-                       txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
-                                 int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  const int bd = txfm_param->bd;
-  switch (tx_type) {
-      // use the c version for anything including identity for now
-    case V_DCT:
-    case H_DCT:
-    case V_ADST:
-    case H_ADST:
-    case V_FLIPADST:
-    case H_FLIPADST:
-    case IDTX:
-      av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-    default:
-      av1_fwd_txfm2d_8x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-  }
-}
-
-static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
-                                 int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  const int bd = txfm_param->bd;
-  switch (tx_type) {
-      // use the c version for anything including identity for now
-    case V_DCT:
-    case H_DCT:
-    case V_ADST:
-    case H_ADST:
-    case V_FLIPADST:
-    case H_FLIPADST:
-    case IDTX:
-      av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-    default:
-      av1_fwd_txfm2d_16x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-  }
-}
-
-static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
-                         txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
-                         txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff,
-                                 int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  av1_fwd_txfm2d_16x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
-                        txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff,
-                                 int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  av1_fwd_txfm2d_4x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
-                        txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff,
-                                 int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  av1_fwd_txfm2d_32x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
-                        txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff,
-                                 int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  av1_fwd_txfm2d_8x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
-                        txfm_param->bd);
-}
-
-static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
-                                int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  const int bd = txfm_param->bd;
-  switch (tx_type) {
-    // use the c version for anything including identity for now
-    case V_DCT:
-    case H_DCT:
-    case V_ADST:
-    case H_ADST:
-    case V_FLIPADST:
-    case H_FLIPADST:
-    case IDTX:
-      av1_fwd_txfm2d_8x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-    default:
-      av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-  }
-}
-
-static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  const int bd = txfm_param->bd;
-  switch (tx_type) {
-    // use the c version for anything including identity for now
-    case V_DCT:
-    case H_DCT:
-    case V_ADST:
-    case H_ADST:
-    case V_FLIPADST:
-    case H_FLIPADST:
-    case IDTX:
-      av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-    default:
-      av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-  }
-}
-
-static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  const int bd = txfm_param->bd;
-  switch (tx_type) {
-    // use the c version for anything including identity for now
-    case V_DCT:
-    case H_DCT:
-    case V_ADST:
-    case H_ADST:
-    case V_FLIPADST:
-    case H_FLIPADST:
-    case IDTX:
-      av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-    default:
-      av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd);
-      break;
-  }
-}
-
-static void highbd_fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  assert(txfm_param->tx_type == DCT_DCT);
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const int bd = txfm_param->bd;
-  av1_fwd_txfm2d_32x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-static void highbd_fwd_txfm_64x32(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  assert(txfm_param->tx_type == DCT_DCT);
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const int bd = txfm_param->bd;
-  av1_fwd_txfm2d_64x32_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-static void highbd_fwd_txfm_16x64(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  assert(txfm_param->tx_type == DCT_DCT);
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const int bd = txfm_param->bd;
-  av1_fwd_txfm2d_16x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-static void highbd_fwd_txfm_64x16(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  assert(txfm_param->tx_type == DCT_DCT);
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const int bd = txfm_param->bd;
-  av1_fwd_txfm2d_64x16_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
-                                  int diff_stride, TxfmParam *txfm_param) {
-  assert(txfm_param->tx_type == DCT_DCT);
-  int32_t *dst_coeff = (int32_t *)coeff;
-  const int bd = txfm_param->bd;
-  av1_fwd_txfm2d_64x64(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
-}
-
-void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
-                  TxfmParam *txfm_param) {
-  if (txfm_param->bd == 8)
-    av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
-  else
-    av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
-}
-
-void av1_lowbd_fwd_txfm_c(const int16_t *src_diff, tran_low_t *coeff,
-                          int diff_stride, TxfmParam *txfm_param) {
-  av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
-}
-
-void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
-                         int diff_stride, TxfmParam *txfm_param) {
-  assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
-  const TX_SIZE tx_size = txfm_param->tx_size;
-  switch (tx_size) {
-    case TX_64X64:
-      highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_32X64:
-      highbd_fwd_txfm_32x64(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_64X32:
-      highbd_fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_16X64:
-      highbd_fwd_txfm_16x64(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_64X16:
-      highbd_fwd_txfm_64x16(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_32X32:
-      highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_16X16:
-      highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_8X8:
-      highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_4X8:
-      highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_8X4:
-      highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_8X16:
-      highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_16X8:
-      highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_16X32:
-      highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_32X16:
-      highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_4X4:
-      highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_4X16:
-      highbd_fwd_txfm_4x16(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_16X4:
-      highbd_fwd_txfm_16x4(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_8X32:
-      highbd_fwd_txfm_8x32(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    case TX_32X8:
-      highbd_fwd_txfm_32x8(src_diff, coeff, diff_stride, txfm_param);
-      break;
-    default: assert(0); break;
-  }
-}
diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h b/third_party/aom/av1/encoder/hybrid_fwd_txfm.h
deleted file mode 100644
index daabc7119..000000000
--- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_
-#define AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_
-
-#include "config/aom_config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
-                  TxfmParam *txfm_param);
-
-void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
-                         int diff_stride, TxfmParam *txfm_param);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_
diff --git a/third_party/aom/av1/encoder/k_means_template.h b/third_party/aom/av1/encoder/k_means_template.h
deleted file mode 100644
index 9e526b88b..000000000
--- a/third_party/aom/av1/encoder/k_means_template.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "av1/encoder/palette.h"
-#include "av1/encoder/random.h"
-
-#ifndef AV1_K_MEANS_DIM
-#error "This template requires AV1_K_MEANS_DIM to be defined"
-#endif
-
-#define RENAME_(x, y) AV1_K_MEANS_RENAME(x, y)
-#define RENAME(x) RENAME_(x, AV1_K_MEANS_DIM)
-
-static int RENAME(calc_dist)(const int *p1, const int *p2) {
-  int dist = 0;
-  for (int i = 0; i < AV1_K_MEANS_DIM; ++i) {
-    const int diff = p1[i] - p2[i];
-    dist += diff * diff;
-  }
-  return dist;
-}
-
-void RENAME(av1_calc_indices)(const int *data, const int *centroids,
-                              uint8_t *indices, int n, int k) {
-  for (int i = 0; i < n; ++i) {
-    int min_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, centroids);
-    indices[i] = 0;
-    for (int j = 1; j < k; ++j) {
-      const int this_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
-                                              centroids + j * AV1_K_MEANS_DIM);
-      if (this_dist < min_dist) {
-        min_dist = this_dist;
-        indices[i] = j;
-      }
-    }
-  }
-}
-
-static void RENAME(calc_centroids)(const int *data, int *centroids,
-                                   const uint8_t *indices, int n, int k) {
-  int i, j;
-  int count[PALETTE_MAX_SIZE] = { 0 };
-  unsigned int rand_state = (unsigned int)data[0];
-  assert(n <= 32768);
-  memset(centroids, 0, sizeof(centroids[0]) * k * AV1_K_MEANS_DIM);
-
-  for (i = 0; i < n; ++i) {
-    const int index = indices[i];
-    assert(index < k);
-    ++count[index];
-    for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
-      centroids[index * AV1_K_MEANS_DIM + j] += data[i * AV1_K_MEANS_DIM + j];
-    }
-  }
-
-  for (i = 0; i < k; ++i) {
-    if (count[i] == 0) {
-      memcpy(centroids + i * AV1_K_MEANS_DIM,
-             data + (lcg_rand16(&rand_state) % n) * AV1_K_MEANS_DIM,
-             sizeof(centroids[0]) * AV1_K_MEANS_DIM);
-    } else {
-      for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
-        centroids[i * AV1_K_MEANS_DIM + j] =
-            DIVIDE_AND_ROUND(centroids[i * AV1_K_MEANS_DIM + j], count[i]);
-      }
-    }
-  }
-}
-
-static int64_t RENAME(calc_total_dist)(const int *data, const int *centroids,
-                                       const uint8_t *indices, int n, int k) {
-  int64_t dist = 0;
-  (void)k;
-  for (int i = 0; i < n; ++i) {
-    dist += RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
-                              centroids + indices[i] * AV1_K_MEANS_DIM);
-  }
-  return dist;
-}
-
-void RENAME(av1_k_means)(const int *data, int *centroids, uint8_t *indices,
-                         int n, int k, int max_itr) {
-  int pre_centroids[2 * PALETTE_MAX_SIZE];
-  uint8_t pre_indices[MAX_SB_SQUARE];
-
-  RENAME(av1_calc_indices)(data, centroids, indices, n, k);
-  int64_t this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
-
-  for (int i = 0; i < max_itr; ++i) {
-    const int64_t pre_dist = this_dist;
-    memcpy(pre_centroids, centroids,
-           sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM);
-    memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
-
-    RENAME(calc_centroids)(data, centroids, indices, n, k);
-    RENAME(av1_calc_indices)(data, centroids, indices, n, k);
-    this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
-
-    if (this_dist > pre_dist) {
-      memcpy(centroids, pre_centroids,
-             sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM);
-      memcpy(indices, pre_indices, sizeof(pre_indices[0]) * n);
-      break;
-    }
-    if (!memcmp(centroids, pre_centroids,
-                sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM))
-      break;
-  }
-}
-#undef RENAME_
-#undef RENAME
diff --git a/third_party/aom/av1/encoder/lookahead.c b/third_party/aom/av1/encoder/lookahead.c
deleted file mode 100644
index 1bf8ecbac..000000000
--- a/third_party/aom/av1/encoder/lookahead.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-#include <stdlib.h>
-
-#include "config/aom_config.h"
-
-#include "av1/common/common.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/extend.h"
-#include "av1/encoder/lookahead.h"
-
-/* Return the buffer at the given absolute index and increment the index */
-static struct lookahead_entry *pop(struct lookahead_ctx *ctx, int *idx) {
-  int index = *idx;
-  struct lookahead_entry *buf = ctx->buf + index;
-
-  assert(index < ctx->max_sz);
-  if (++index >= ctx->max_sz) index -= ctx->max_sz;
-  *idx = index;
-  return buf;
-}
-
-void av1_lookahead_destroy(struct lookahead_ctx *ctx) {
-  if (ctx) {
-    if (ctx->buf) {
-      int i;
-
-      for (i = 0; i < ctx->max_sz; i++) aom_free_frame_buffer(&ctx->buf[i].img);
-      free(ctx->buf);
-    }
-    free(ctx);
-  }
-}
-
-struct lookahead_ctx *av1_lookahead_init(
-    unsigned int width, unsigned int height, unsigned int subsampling_x,
-    unsigned int subsampling_y, int use_highbitdepth, unsigned int depth) {
-  struct lookahead_ctx *ctx = NULL;
-
-  // Clamp the lookahead queue depth
-  depth = clamp(depth, 1, MAX_LAG_BUFFERS);
-
-  // Allocate memory to keep previous source frames available.
-  depth += MAX_PRE_FRAMES;
-
-  // Allocate the lookahead structures
-  ctx = calloc(1, sizeof(*ctx));
-  if (ctx) {
-    const int legacy_byte_alignment = 0;
-    unsigned int i;
-    ctx->max_sz = depth;
-    ctx->buf = calloc(depth, sizeof(*ctx->buf));
-    if (!ctx->buf) goto bail;
-    for (i = 0; i < depth; i++)
-      if (aom_alloc_frame_buffer(&ctx->buf[i].img, width, height, subsampling_x,
-                                 subsampling_y, use_highbitdepth,
-                                 AOM_BORDER_IN_PIXELS, legacy_byte_alignment))
-        goto bail;
-  }
-  return ctx;
-bail:
-  av1_lookahead_destroy(ctx);
-  return NULL;
-}
-
-#define USE_PARTIAL_COPY 0
-
-int av1_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
-                       int64_t ts_start, int64_t ts_end, int use_highbitdepth,
-                       aom_enc_frame_flags_t flags) {
-  struct lookahead_entry *buf;
-#if USE_PARTIAL_COPY
-  int row, col, active_end;
-  int mb_rows = (src->y_height + 15) >> 4;
-  int mb_cols = (src->y_width + 15) >> 4;
-#endif
-  int width = src->y_crop_width;
-  int height = src->y_crop_height;
-  int uv_width = src->uv_crop_width;
-  int uv_height = src->uv_crop_height;
-  int subsampling_x = src->subsampling_x;
-  int subsampling_y = src->subsampling_y;
-  int larger_dimensions, new_dimensions;
-
-  if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1;
-  ctx->sz++;
-  buf = pop(ctx, &ctx->write_idx);
-
-  new_dimensions = width != buf->img.y_crop_width ||
-                   height != buf->img.y_crop_height ||
-                   uv_width != buf->img.uv_crop_width ||
-                   uv_height != buf->img.uv_crop_height;
-  larger_dimensions = width > buf->img.y_width || height > buf->img.y_height ||
-                      uv_width > buf->img.uv_width ||
-                      uv_height > buf->img.uv_height;
-  assert(!larger_dimensions || new_dimensions);
-
-#if USE_PARTIAL_COPY
-  // TODO(jkoleszar): This is disabled for now, as
-  // av1_copy_and_extend_frame_with_rect is not subsampling/alpha aware.
-
-  // Only do this partial copy if the following conditions are all met:
-  // 1. Lookahead queue has has size of 1.
-  // 2. Active map is provided.
-  // 3. This is not a key frame, golden nor altref frame.
-  if (!new_dimensions && ctx->max_sz == 1 && active_map && !flags) {
-    for (row = 0; row < mb_rows; ++row) {
-      col = 0;
-
-      while (1) {
-        // Find the first active macroblock in this row.
-        for (; col < mb_cols; ++col) {
-          if (active_map[col]) break;
-        }
-
-        // No more active macroblock in this row.
-        if (col == mb_cols) break;
-
-        // Find the end of active region in this row.
-        active_end = col;
-
-        for (; active_end < mb_cols; ++active_end) {
-          if (!active_map[active_end]) break;
-        }
-
-        // Only copy this active region.
-        av1_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4,
-                                            16, (active_end - col) << 4);
-
-        // Start again from the end of this active region.
-        col = active_end;
-      }
-
-      active_map += mb_cols;
-    }
-  } else {
-#endif
-    if (larger_dimensions) {
-      YV12_BUFFER_CONFIG new_img;
-      memset(&new_img, 0, sizeof(new_img));
-      if (aom_alloc_frame_buffer(&new_img, width, height, subsampling_x,
-                                 subsampling_y, use_highbitdepth,
-                                 AOM_BORDER_IN_PIXELS, 0))
-        return 1;
-      aom_free_frame_buffer(&buf->img);
-      buf->img = new_img;
-    } else if (new_dimensions) {
-      buf->img.y_crop_width = src->y_crop_width;
-      buf->img.y_crop_height = src->y_crop_height;
-      buf->img.uv_crop_width = src->uv_crop_width;
-      buf->img.uv_crop_height = src->uv_crop_height;
-      buf->img.subsampling_x = src->subsampling_x;
-      buf->img.subsampling_y = src->subsampling_y;
-    }
-    // Partial copy not implemented yet
-    av1_copy_and_extend_frame(src, &buf->img);
-#if USE_PARTIAL_COPY
-  }
-#endif
-
-  buf->ts_start = ts_start;
-  buf->ts_end = ts_end;
-  buf->flags = flags;
-  return 0;
-}
-
-struct lookahead_entry *av1_lookahead_pop(struct lookahead_ctx *ctx,
-                                          int drain) {
-  struct lookahead_entry *buf = NULL;
-
-  if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
-    buf = pop(ctx, &ctx->read_idx);
-    ctx->sz--;
-  }
-  return buf;
-}
-
-struct lookahead_entry *av1_lookahead_peek(struct lookahead_ctx *ctx,
-                                           int index) {
-  struct lookahead_entry *buf = NULL;
-
-  if (index >= 0) {
-    // Forward peek
-    if (index < ctx->sz) {
-      index += ctx->read_idx;
-      if (index >= ctx->max_sz) index -= ctx->max_sz;
-      buf = ctx->buf + index;
-    }
-  } else if (index < 0) {
-    // Backward peek
-    if (-index <= MAX_PRE_FRAMES) {
-      index += (int)(ctx->read_idx);
-      if (index < 0) index += (int)(ctx->max_sz);
-      buf = ctx->buf + index;
-    }
-  }
-
-  return buf;
-}
-
-unsigned int av1_lookahead_depth(struct lookahead_ctx *ctx) { return ctx->sz; }
diff --git a/third_party/aom/av1/encoder/lookahead.h b/third_party/aom/av1/encoder/lookahead.h
deleted file mode 100644
index e55224cf7..000000000
--- a/third_party/aom/av1/encoder/lookahead.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_LOOKAHEAD_H_
-#define AOM_AV1_ENCODER_LOOKAHEAD_H_
-
-#include "aom_scale/yv12config.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_LAG_BUFFERS 25
-
-struct lookahead_entry {
-  YV12_BUFFER_CONFIG img;
-  int64_t ts_start;
-  int64_t ts_end;
-  aom_enc_frame_flags_t flags;
-};
-
-// The max of past frames we want to keep in the queue.
-#define MAX_PRE_FRAMES 1
-
-struct lookahead_ctx {
-  int max_sz;                  /* Absolute size of the queue */
-  int sz;                      /* Number of buffers currently in the queue */
-  int read_idx;                /* Read index */
-  int write_idx;               /* Write index */
-  struct lookahead_entry *buf; /* Buffer list */
-};
-
-/**\brief Initializes the lookahead stage
- *
- * The lookahead stage is a queue of frame buffers on which some analysis
- * may be done when buffers are enqueued.
- */
-struct lookahead_ctx *av1_lookahead_init(
-    unsigned int width, unsigned int height, unsigned int subsampling_x,
-    unsigned int subsampling_y, int use_highbitdepth, unsigned int depth);
-
-/**\brief Destroys the lookahead stage
- */
-void av1_lookahead_destroy(struct lookahead_ctx *ctx);
-
-/**\brief Enqueue a source buffer
- *
- * This function will copy the source image into a new framebuffer with
- * the expected stride/border.
- *
- * If active_map is non-NULL and there is only one frame in the queue, then copy
- * only active macroblocks.
- *
- * \param[in] ctx         Pointer to the lookahead context
- * \param[in] src         Pointer to the image to enqueue
- * \param[in] ts_start    Timestamp for the start of this frame
- * \param[in] ts_end      Timestamp for the end of this frame
- * \param[in] flags       Flags set on this frame
- * \param[in] active_map  Map that specifies which macroblock is active
- */
-int av1_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
-                       int64_t ts_start, int64_t ts_end, int use_highbitdepth,
-                       aom_enc_frame_flags_t flags);
-
-/**\brief Get the next source buffer to encode
- *
- *
- * \param[in] ctx       Pointer to the lookahead context
- * \param[in] drain     Flag indicating the buffer should be drained
- *                      (return a buffer regardless of the current queue depth)
- *
- * \retval NULL, if drain set and queue is empty
- * \retval NULL, if drain not set and queue not of the configured depth
- */
-struct lookahead_entry *av1_lookahead_pop(struct lookahead_ctx *ctx, int drain);
-
-/**\brief Get a future source buffer to encode
- *
- * \param[in] ctx       Pointer to the lookahead context
- * \param[in] index     Index of the frame to be returned, 0 == next frame
- *
- * \retval NULL, if no buffer exists at the specified index
- */
-struct lookahead_entry *av1_lookahead_peek(struct lookahead_ctx *ctx,
-                                           int index);
-
-/**\brief Get the number of frames currently in the lookahead queue
- *
- * \param[in] ctx       Pointer to the lookahead context
- */
-unsigned int av1_lookahead_depth(struct lookahead_ctx *ctx);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_LOOKAHEAD_H_
diff --git a/third_party/aom/av1/encoder/mathutils.h b/third_party/aom/av1/encoder/mathutils.h
deleted file mode 100644
index 64f936176..000000000
--- a/third_party/aom/av1/encoder/mathutils.h
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_MATHUTILS_H_
-#define AOM_AV1_ENCODER_MATHUTILS_H_
-
-#include <memory.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-static const double TINY_NEAR_ZERO = 1.0E-16;
-
-// Solves Ax = b, where x and b are column vectors of size nx1 and A is nxn
-static INLINE int linsolve(int n, double *A, int stride, double *b, double *x) {
-  int i, j, k;
-  double c;
-  // Forward elimination
-  for (k = 0; k < n - 1; k++) {
-    // Bring the largest magnitude to the diagonal position
-    for (i = n - 1; i > k; i--) {
-      if (fabs(A[(i - 1) * stride + k]) < fabs(A[i * stride + k])) {
-        for (j = 0; j < n; j++) {
-          c = A[i * stride + j];
-          A[i * stride + j] = A[(i - 1) * stride + j];
-          A[(i - 1) * stride + j] = c;
-        }
-        c = b[i];
-        b[i] = b[i - 1];
-        b[i - 1] = c;
-      }
-    }
-    for (i = k; i < n - 1; i++) {
-      if (fabs(A[k * stride + k]) < TINY_NEAR_ZERO) return 0;
-      c = A[(i + 1) * stride + k] / A[k * stride + k];
-      for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j];
-      b[i + 1] -= c * b[k];
-    }
-  }
-  // Backward substitution
-  for (i = n - 1; i >= 0; i--) {
-    if (fabs(A[i * stride + i]) < TINY_NEAR_ZERO) return 0;
-    c = 0;
-    for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j];
-    x[i] = (b[i] - c) / A[i * stride + i];
-  }
-
-  return 1;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Least-squares
-// Solves for n-dim x in a least squares sense to minimize |Ax - b|^2
-// The solution is simply x = (A'A)^-1 A'b or simply the solution for
-// the system: A'A x = A'b
-static INLINE int least_squares(int n, double *A, int rows, int stride,
-                                double *b, double *scratch, double *x) {
-  int i, j, k;
-  double *scratch_ = NULL;
-  double *AtA, *Atb;
-  if (!scratch) {
-    scratch_ = (double *)aom_malloc(sizeof(*scratch) * n * (n + 1));
-    scratch = scratch_;
-  }
-  AtA = scratch;
-  Atb = scratch + n * n;
-
-  for (i = 0; i < n; ++i) {
-    for (j = i; j < n; ++j) {
-      AtA[i * n + j] = 0.0;
-      for (k = 0; k < rows; ++k)
-        AtA[i * n + j] += A[k * stride + i] * A[k * stride + j];
-      AtA[j * n + i] = AtA[i * n + j];
-    }
-    Atb[i] = 0;
-    for (k = 0; k < rows; ++k) Atb[i] += A[k * stride + i] * b[k];
-  }
-  int ret = linsolve(n, AtA, n, Atb, x);
-  if (scratch_) aom_free(scratch_);
-  return ret;
-}
-
-// Matrix multiply
-static INLINE void multiply_mat(const double *m1, const double *m2, double *res,
-                                const int m1_rows, const int inner_dim,
-                                const int m2_cols) {
-  double sum;
-
-  int row, col, inner;
-  for (row = 0; row < m1_rows; ++row) {
-    for (col = 0; col < m2_cols; ++col) {
-      sum = 0;
-      for (inner = 0; inner < inner_dim; ++inner)
-        sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
-      *(res++) = sum;
-    }
-  }
-}
-
-//
-// The functions below are needed only for homography computation
-// Remove if the homography models are not used.
-//
-///////////////////////////////////////////////////////////////////////////////
-// svdcmp
-// Adopted from Numerical Recipes in C
-
-static INLINE double sign(double a, double b) {
-  return ((b) >= 0 ? fabs(a) : -fabs(a));
-}
-
-static INLINE double pythag(double a, double b) {
-  double ct;
-  const double absa = fabs(a);
-  const double absb = fabs(b);
-
-  if (absa > absb) {
-    ct = absb / absa;
-    return absa * sqrt(1.0 + ct * ct);
-  } else {
-    ct = absa / absb;
-    return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
-  }
-}
-
-static INLINE int svdcmp(double **u, int m, int n, double w[], double **v) {
-  const int max_its = 30;
-  int flag, i, its, j, jj, k, l, nm;
-  double anorm, c, f, g, h, s, scale, x, y, z;
-  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
-  g = scale = anorm = 0.0;
-  for (i = 0; i < n; i++) {
-    l = i + 1;
-    rv1[i] = scale * g;
-    g = s = scale = 0.0;
-    if (i < m) {
-      for (k = i; k < m; k++) scale += fabs(u[k][i]);
-      if (scale != 0.) {
-        for (k = i; k < m; k++) {
-          u[k][i] /= scale;
-          s += u[k][i] * u[k][i];
-        }
-        f = u[i][i];
-        g = -sign(sqrt(s), f);
-        h = f * g - s;
-        u[i][i] = f - g;
-        for (j = l; j < n; j++) {
-          for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
-          f = s / h;
-          for (k = i; k < m; k++) u[k][j] += f * u[k][i];
-        }
-        for (k = i; k < m; k++) u[k][i] *= scale;
-      }
-    }
-    w[i] = scale * g;
-    g = s = scale = 0.0;
-    if (i < m && i != n - 1) {
-      for (k = l; k < n; k++) scale += fabs(u[i][k]);
-      if (scale != 0.) {
-        for (k = l; k < n; k++) {
-          u[i][k] /= scale;
-          s += u[i][k] * u[i][k];
-        }
-        f = u[i][l];
-        g = -sign(sqrt(s), f);
-        h = f * g - s;
-        u[i][l] = f - g;
-        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
-        for (j = l; j < m; j++) {
-          for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
-          for (k = l; k < n; k++) u[j][k] += s * rv1[k];
-        }
-        for (k = l; k < n; k++) u[i][k] *= scale;
-      }
-    }
-    anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
-  }
-
-  for (i = n - 1; i >= 0; i--) {
-    if (i < n - 1) {
-      if (g != 0.) {
-        for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
-        for (j = l; j < n; j++) {
-          for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
-          for (k = l; k < n; k++) v[k][j] += s * v[k][i];
-        }
-      }
-      for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
-    }
-    v[i][i] = 1.0;
-    g = rv1[i];
-    l = i;
-  }
-  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
-    l = i + 1;
-    g = w[i];
-    for (j = l; j < n; j++) u[i][j] = 0.0;
-    if (g != 0.) {
-      g = 1.0 / g;
-      for (j = l; j < n; j++) {
-        for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
-        f = (s / u[i][i]) * g;
-        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
-      }
-      for (j = i; j < m; j++) u[j][i] *= g;
-    } else {
-      for (j = i; j < m; j++) u[j][i] = 0.0;
-    }
-    ++u[i][i];
-  }
-  for (k = n - 1; k >= 0; k--) {
-    for (its = 0; its < max_its; its++) {
-      flag = 1;
-      for (l = k; l >= 0; l--) {
-        nm = l - 1;
-        if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
-          flag = 0;
-          break;
-        }
-        if ((double)(fabs(w[nm]) + anorm) == anorm) break;
-      }
-      if (flag) {
-        c = 0.0;
-        s = 1.0;
-        for (i = l; i <= k; i++) {
-          f = s * rv1[i];
-          rv1[i] = c * rv1[i];
-          if ((double)(fabs(f) + anorm) == anorm) break;
-          g = w[i];
-          h = pythag(f, g);
-          w[i] = h;
-          h = 1.0 / h;
-          c = g * h;
-          s = -f * h;
-          for (j = 0; j < m; j++) {
-            y = u[j][nm];
-            z = u[j][i];
-            u[j][nm] = y * c + z * s;
-            u[j][i] = z * c - y * s;
-          }
-        }
-      }
-      z = w[k];
-      if (l == k) {
-        if (z < 0.0) {
-          w[k] = -z;
-          for (j = 0; j < n; j++) v[j][k] = -v[j][k];
-        }
-        break;
-      }
-      if (its == max_its - 1) {
-        aom_free(rv1);
-        return 1;
-      }
-      assert(k > 0);
-      x = w[l];
-      nm = k - 1;
-      y = w[nm];
-      g = rv1[nm];
-      h = rv1[k];
-      f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
-      g = pythag(f, 1.0);
-      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
-      c = s = 1.0;
-      for (j = l; j <= nm; j++) {
-        i = j + 1;
-        g = rv1[i];
-        y = w[i];
-        h = s * g;
-        g = c * g;
-        z = pythag(f, h);
-        rv1[j] = z;
-        c = f / z;
-        s = h / z;
-        f = x * c + g * s;
-        g = g * c - x * s;
-        h = y * s;
-        y *= c;
-        for (jj = 0; jj < n; jj++) {
-          x = v[jj][j];
-          z = v[jj][i];
-          v[jj][j] = x * c + z * s;
-          v[jj][i] = z * c - x * s;
-        }
-        z = pythag(f, h);
-        w[j] = z;
-        if (z != 0.) {
-          z = 1.0 / z;
-          c = f * z;
-          s = h * z;
-        }
-        f = c * g + s * y;
-        x = c * y - s * g;
-        for (jj = 0; jj < m; jj++) {
-          y = u[jj][j];
-          z = u[jj][i];
-          u[jj][j] = y * c + z * s;
-          u[jj][i] = z * c - y * s;
-        }
-      }
-      rv1[l] = 0.0;
-      rv1[k] = f;
-      w[k] = x;
-    }
-  }
-  aom_free(rv1);
-  return 0;
-}
-
-static INLINE int SVD(double *U, double *W, double *V, double *matx, int M,
-                      int N) {
-  // Assumes allocation for U is MxN
-  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
-  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
-  int problem, i;
-
-  problem = !(nrU && nrV);
-  if (!problem) {
-    for (i = 0; i < M; i++) {
-      nrU[i] = &U[i * N];
-    }
-    for (i = 0; i < N; i++) {
-      nrV[i] = &V[i * N];
-    }
-  } else {
-    if (nrU) aom_free(nrU);
-    if (nrV) aom_free(nrV);
-    return 1;
-  }
-
-  /* copy from given matx into nrU */
-  for (i = 0; i < M; i++) {
-    memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
-  }
-
-  /* HERE IT IS: do SVD */
-  if (svdcmp(nrU, M, N, W, nrV)) {
-    aom_free(nrU);
-    aom_free(nrV);
-    return 1;
-  }
-
-  /* aom_free Numerical Recipes arrays */
-  aom_free(nrU);
-  aom_free(nrV);
-
-  return 0;
-}
-
-#endif  // AOM_AV1_ENCODER_MATHUTILS_H_
diff --git a/third_party/aom/av1/encoder/mbgraph.c b/third_party/aom/av1/encoder/mbgraph.c
deleted file mode 100644
index 1a35ff77c..000000000
--- a/third_party/aom/av1/encoder/mbgraph.c
+++ /dev/null
@@ -1,401 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-
-#include "config/av1_rtcd.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/system_state.h"
-#include "av1/common/blockd.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/reconinter_enc.h"
-#include "av1/encoder/segmentation.h"
-
-static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv,
-                                              int mb_row, int mb_col) {
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
-  const aom_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
-
-  const MvLimits tmp_mv_limits = x->mv_limits;
-  MV ref_full;
-  int cost_list[5];
-
-  // Further step/diamond searches as necessary
-  int step_param = mv_sf->reduce_first_step_size;
-  step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2);
-
-  av1_set_mv_search_range(&x->mv_limits, ref_mv);
-
-  ref_full.col = ref_mv->col >> 3;
-  ref_full.row = ref_mv->row >> 3;
-
-  /*cpi->sf.search_method == HEX*/
-  av1_hex_search(x, &ref_full, step_param, x->errorperbit, 0,
-                 cond_cost_list(cpi, cost_list), &v_fn_ptr, 0, ref_mv);
-
-  // Try sub-pixel MC
-  // if (bestsme > error_thresh && bestsme < INT_MAX)
-  if (cpi->common.cur_frame_force_integer_mv == 1) {
-    x->best_mv.as_mv.row *= 8;
-    x->best_mv.as_mv.col *= 8;
-  } else {
-    int distortion;
-    unsigned int sse;
-    cpi->find_fractional_mv_step(
-        x, &cpi->common, mb_row, mb_col, ref_mv,
-        cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0,
-        mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL,
-        NULL, &distortion, &sse, NULL, NULL, 0, 0, 0, 0, 0);
-  }
-
-  if (has_second_ref(xd->mi[0]))
-    xd->mi[0]->mode = NEW_NEWMV;
-  else
-    xd->mi[0]->mode = NEWMV;
-
-  xd->mi[0]->mv[0] = x->best_mv;
-  xd->mi[0]->ref_frame[1] = NONE_FRAME;
-
-  av1_build_inter_predictors_sby(&cpi->common, xd, mb_row, mb_col, NULL,
-                                 BLOCK_16X16);
-
-  /* restore UMV window */
-  x->mv_limits = tmp_mv_limits;
-
-  return aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                      xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-}
-
-static int do_16x16_motion_search(AV1_COMP *cpi, const MV *ref_mv, int mb_row,
-                                  int mb_col) {
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  unsigned int err, tmp_err;
-  MV best_mv;
-
-  // Try zero MV first
-  // FIXME should really use something like near/nearest MV and/or MV prediction
-  err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                     xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
-  best_mv.col = best_mv.row = 0;
-
-  // Test last reference frame using the previous best mv as the
-  // starting point (best reference) for the search
-  tmp_err = do_16x16_motion_iteration(cpi, ref_mv, mb_row, mb_col);
-  if (tmp_err < err) {
-    err = tmp_err;
-    best_mv = x->best_mv.as_mv;
-  }
-
-  // If the current best reference mv is not centered on 0,0 then do a 0,0
-  // based search as well.
-  if (ref_mv->row != 0 || ref_mv->col != 0) {
-    MV zero_ref_mv = kZeroMv;
-
-    tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, mb_row, mb_col);
-    if (tmp_err < err) {
-      err = tmp_err;
-      best_mv = x->best_mv.as_mv;
-    }
-  }
-
-  x->best_mv.as_mv = best_mv;
-  return err;
-}
-
-static int do_16x16_zerozero_search(AV1_COMP *cpi, int_mv *dst_mv) {
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  unsigned int err;
-
-  // Try zero MV first
-  // FIXME should really use something like near/nearest MV and/or MV prediction
-  err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                     xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
-
-  dst_mv->as_int = 0;
-
-  return err;
-}
-static int find_best_16x16_intra(AV1_COMP *cpi, PREDICTION_MODE *pbest_mode) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  PREDICTION_MODE best_mode = -1, mode;
-  unsigned int best_err = INT_MAX;
-
-  // calculate SATD for each intra prediction mode;
-  // we're intentionally not doing 4x4, we just want a rough estimate
-  for (mode = INTRA_MODE_START; mode < INTRA_MODE_END; mode++) {
-    unsigned int err;
-
-    xd->mi[0]->mode = mode;
-    av1_predict_intra_block(cm, xd, 16, 16, TX_16X16, mode, 0, 0,
-                            FILTER_INTRA_MODES, x->plane[0].src.buf,
-                            x->plane[0].src.stride, xd->plane[0].dst.buf,
-                            xd->plane[0].dst.stride, 0, 0, 0);
-    err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
-                       xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-
-    // find best
-    if (err < best_err) {
-      best_err = err;
-      best_mode = mode;
-    }
-  }
-
-  if (pbest_mode) *pbest_mode = best_mode;
-
-  return best_err;
-}
-
-static void update_mbgraph_mb_stats(AV1_COMP *cpi, MBGRAPH_MB_STATS *stats,
-                                    YV12_BUFFER_CONFIG *buf, int mb_y_offset,
-                                    YV12_BUFFER_CONFIG *golden_ref,
-                                    const MV *prev_golden_ref_mv,
-                                    YV12_BUFFER_CONFIG *alt_ref, int mb_row,
-                                    int mb_col) {
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int intra_error;
-  AV1_COMMON *cm = &cpi->common;
-
-  // FIXME in practice we're completely ignoring chroma here
-  x->plane[0].src.buf = buf->y_buffer + mb_y_offset;
-  x->plane[0].src.stride = buf->y_stride;
-
-  xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset;
-  xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride;
-
-  // do intra 16x16 prediction
-  intra_error = find_best_16x16_intra(cpi, &stats->ref[INTRA_FRAME].m.mode);
-  if (intra_error <= 0) intra_error = 1;
-  stats->ref[INTRA_FRAME].err = intra_error;
-
-  // Golden frame MV search, if it exists and is different than last frame
-  if (golden_ref) {
-    int g_motion_error;
-    xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset;
-    xd->plane[0].pre[0].stride = golden_ref->y_stride;
-    g_motion_error =
-        do_16x16_motion_search(cpi, prev_golden_ref_mv, mb_row, mb_col);
-    stats->ref[GOLDEN_FRAME].m.mv = x->best_mv;
-    stats->ref[GOLDEN_FRAME].err = g_motion_error;
-  } else {
-    stats->ref[GOLDEN_FRAME].err = INT_MAX;
-    stats->ref[GOLDEN_FRAME].m.mv.as_int = 0;
-  }
-
-  // Do an Alt-ref frame MV search, if it exists and is different than
-  // last/golden frame.
-  if (alt_ref) {
-    int a_motion_error;
-    xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset;
-    xd->plane[0].pre[0].stride = alt_ref->y_stride;
-    a_motion_error =
-        do_16x16_zerozero_search(cpi, &stats->ref[ALTREF_FRAME].m.mv);
-
-    stats->ref[ALTREF_FRAME].err = a_motion_error;
-  } else {
-    stats->ref[ALTREF_FRAME].err = INT_MAX;
-    stats->ref[ALTREF_FRAME].m.mv.as_int = 0;
-  }
-}
-
-static void update_mbgraph_frame_stats(AV1_COMP *cpi,
-                                       MBGRAPH_FRAME_STATS *stats,
-                                       YV12_BUFFER_CONFIG *buf,
-                                       YV12_BUFFER_CONFIG *golden_ref,
-                                       YV12_BUFFER_CONFIG *alt_ref) {
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  AV1_COMMON *const cm = &cpi->common;
-
-  int mb_col, mb_row, offset = 0;
-  int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
-  MV gld_top_mv = kZeroMv;
-  MB_MODE_INFO mi_local;
-
-  av1_zero(mi_local);
-  // Set up limit values for motion vectors to prevent them extending outside
-  // the UMV borders.
-  x->mv_limits.row_min = -BORDER_MV_PIXELS_B16;
-  x->mv_limits.row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
-  xd->up_available = 0;
-  xd->plane[0].dst.stride = buf->y_stride;
-  xd->plane[0].pre[0].stride = buf->y_stride;
-  xd->plane[1].dst.stride = buf->uv_stride;
-  xd->mi[0] = &mi_local;
-  mi_local.sb_type = BLOCK_16X16;
-  mi_local.ref_frame[0] = LAST_FRAME;
-  mi_local.ref_frame[1] = NONE_FRAME;
-
-  for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
-    MV gld_left_mv = gld_top_mv;
-    int mb_y_in_offset = mb_y_offset;
-    int arf_y_in_offset = arf_y_offset;
-    int gld_y_in_offset = gld_y_offset;
-
-    // Set up limit values for motion vectors to prevent them extending outside
-    // the UMV borders.
-    x->mv_limits.col_min = -BORDER_MV_PIXELS_B16;
-    x->mv_limits.col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
-    xd->left_available = 0;
-
-    for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
-      MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col];
-
-      update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref,
-                              &gld_left_mv, alt_ref, mb_row, mb_col);
-      gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
-      if (mb_col == 0) {
-        gld_top_mv = gld_left_mv;
-      }
-      xd->left_available = 1;
-      mb_y_in_offset += 16;
-      gld_y_in_offset += 16;
-      arf_y_in_offset += 16;
-      x->mv_limits.col_min -= 16;
-      x->mv_limits.col_max -= 16;
-    }
-    xd->up_available = 1;
-    mb_y_offset += buf->y_stride * 16;
-    gld_y_offset += golden_ref->y_stride * 16;
-    if (alt_ref) arf_y_offset += alt_ref->y_stride * 16;
-    x->mv_limits.row_min -= 16;
-    x->mv_limits.row_max -= 16;
-    offset += cm->mb_cols;
-  }
-}
-
-// void separate_arf_mbs_byzz
-static void separate_arf_mbs(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  int mb_col, mb_row, offset, i;
-  int mi_row, mi_col;
-  int ncnt[4] = { 0 };
-  int n_frames = cpi->mbgraph_n_frames;
-
-  int *arf_not_zz;
-
-  CHECK_MEM_ERROR(
-      cm, arf_not_zz,
-      aom_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1));
-
-  // We are not interested in results beyond the alt ref itself.
-  if (n_frames > cpi->rc.frames_till_gf_update_due)
-    n_frames = cpi->rc.frames_till_gf_update_due;
-
-  // defer cost to reference frames
-  for (i = n_frames - 1; i >= 0; i--) {
-    MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
-
-    for (offset = 0, mb_row = 0; mb_row < cm->mb_rows;
-         offset += cm->mb_cols, mb_row++) {
-      for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
-        MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col];
-
-        int altref_err = mb_stats->ref[ALTREF_FRAME].err;
-        int intra_err = mb_stats->ref[INTRA_FRAME].err;
-        int golden_err = mb_stats->ref[GOLDEN_FRAME].err;
-
-        // Test for altref vs intra and gf and that its mv was 0,0.
-        if (altref_err > 1000 || altref_err > intra_err ||
-            altref_err > golden_err) {
-          arf_not_zz[offset + mb_col]++;
-        }
-      }
-    }
-  }
-
-  // arf_not_zz is indexed by MB, but this loop is indexed by MI to avoid out
-  // of bound access in segmentation_map
-  for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
-      // If any of the blocks in the sequence failed then the MB
-      // goes in segment 0
-      if (arf_not_zz[mi_row / 2 * cm->mb_cols + mi_col / 2]) {
-        ncnt[0]++;
-        cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0;
-      } else {
-        cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 1;
-        ncnt[1]++;
-      }
-    }
-  }
-
-  // Only bother with segmentation if over 10% of the MBs in static segment
-  // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) )
-  if (1) {
-    // Note % of blocks that are marked as static
-    if (cm->MBs)
-      cpi->static_mb_pct = (ncnt[1] * 100) / (cm->mi_rows * cm->mi_cols);
-
-    // This error case should not be reachable as this function should
-    // never be called with the common data structure uninitialized.
-    else
-      cpi->static_mb_pct = 0;
-
-    av1_enable_segmentation(&cm->seg);
-  } else {
-    cpi->static_mb_pct = 0;
-    av1_disable_segmentation(&cm->seg);
-  }
-
-  // Free localy allocated storage
-  aom_free(arf_not_zz);
-}
-
-void av1_update_mbgraph_stats(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  int i, n_frames = av1_lookahead_depth(cpi->lookahead);
-  YV12_BUFFER_CONFIG *golden_ref = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
-
-  assert(golden_ref != NULL);
-
-  // we need to look ahead beyond where the ARF transitions into
-  // being a GF - so exit if we don't look ahead beyond that
-  if (n_frames <= cpi->rc.frames_till_gf_update_due) return;
-
-  if (n_frames > MAX_LAG_BUFFERS) n_frames = MAX_LAG_BUFFERS;
-
-  cpi->mbgraph_n_frames = n_frames;
-  for (i = 0; i < n_frames; i++) {
-    MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
-    memset(frame_stats->mb_stats, 0,
-           cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats));
-  }
-
-  // do motion search to find contribution of each reference to data
-  // later on in this GF group
-  // FIXME really, the GF/last MC search should be done forward, and
-  // the ARF MC search backwards, to get optimal results for MV caching
-  for (i = 0; i < n_frames; i++) {
-    MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
-    struct lookahead_entry *q_cur = av1_lookahead_peek(cpi->lookahead, i);
-
-    assert(q_cur != NULL);
-
-    update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, golden_ref,
-                               cpi->source);
-  }
-
-  aom_clear_system_state();
-
-  separate_arf_mbs(cpi);
-}
diff --git a/third_party/aom/av1/encoder/mbgraph.h b/third_party/aom/av1/encoder/mbgraph.h
deleted file mode 100644
index ba08476f7..000000000
--- a/third_party/aom/av1/encoder/mbgraph.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_MBGRAPH_H_
-#define AOM_AV1_ENCODER_MBGRAPH_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
-  struct {
-    int err;
-    union {
-      int_mv mv;
-      PREDICTION_MODE mode;
-    } m;
-  } ref[REF_FRAMES];
-} MBGRAPH_MB_STATS;
-
-typedef struct {
-  MBGRAPH_MB_STATS *mb_stats;
-} MBGRAPH_FRAME_STATS;
-
-struct AV1_COMP;
-
-void av1_update_mbgraph_stats(struct AV1_COMP *cpi);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_MBGRAPH_H_
diff --git a/third_party/aom/av1/encoder/mcomp.c b/third_party/aom/av1/encoder/mcomp.c
deleted file mode 100644
index 8f6de9b53..000000000
--- a/third_party/aom/av1/encoder/mcomp.c
+++ /dev/null
@@ -1,2885 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/common.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/reconinter_enc.h"
-
-// #define NEW_DIAMOND_SEARCH
-
-static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
-                                             const MV *mv) {
-  return &buf->buf[mv->row * buf->stride + mv->col];
-}
-
-void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
-  int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
-  int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
-  int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
-  int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
-
-  col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
-  row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
-  col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
-  row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
-
-  // Get intersection of UMV window and valid MV window to reduce # of checks
-  // in diamond search.
-  if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
-  if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
-  if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
-  if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
-}
-
-static void set_subpel_mv_search_range(const MvLimits *mv_limits, int *col_min,
-                                       int *col_max, int *row_min, int *row_max,
-                                       const MV *ref_mv) {
-  const int max_mv = MAX_FULL_PEL_VAL * 8;
-  const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
-  const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
-  const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
-  const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
-
-  *col_min = AOMMAX(MV_LOW + 1, minc);
-  *col_max = AOMMIN(MV_UPP - 1, maxc);
-  *row_min = AOMMAX(MV_LOW + 1, minr);
-  *row_max = AOMMIN(MV_UPP - 1, maxr);
-}
-
-int av1_init_search_range(int size) {
-  int sr = 0;
-  // Minimum search size no matter what the passed in value.
-  size = AOMMAX(16, size);
-
-  while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
-
-  sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2);
-  return sr;
-}
-
-static INLINE int mv_cost(const MV *mv, const int *joint_cost,
-                          int *const comp_cost[2]) {
-  return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
-         comp_cost[1][mv->col];
-}
-
-int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
-                    int *mvcost[2], int weight) {
-  const MV diff = { mv->row - ref->row, mv->col - ref->col };
-  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
-}
-
-#define PIXEL_TRANSFORM_ERROR_SCALE 4
-static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
-                       int *mvcost[2], int error_per_bit) {
-  if (mvcost) {
-    const MV diff = { mv->row - ref->row, mv->col - ref->col };
-    return (int)ROUND_POWER_OF_TWO_64(
-        (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
-        RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
-            PIXEL_TRANSFORM_ERROR_SCALE);
-  }
-  return 0;
-}
-
-static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
-                          int sad_per_bit) {
-  const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
-  return ROUND_POWER_OF_TWO(
-      (unsigned)mv_cost(&diff, x->nmvjointcost, x->mvcost) * sad_per_bit,
-      AV1_PROB_COST_SHIFT);
-}
-
-void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
-  int len, ss_count = 1;
-
-  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
-  cfg->ss[0].offset = 0;
-
-  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
-    // Generate offsets for 4 search sites per step.
-    const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
-    int i;
-    for (i = 0; i < 4; ++i) {
-      search_site *const ss = &cfg->ss[ss_count++];
-      ss->mv = ss_mvs[i];
-      ss->offset = ss->mv.row * stride + ss->mv.col;
-    }
-  }
-
-  cfg->ss_count = ss_count;
-  cfg->searches_per_step = 4;
-}
-
-void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
-  int len, ss_count = 1;
-
-  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
-  cfg->ss[0].offset = 0;
-
-  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
-    // Generate offsets for 8 search sites per step.
-    const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
-                           { 0, len },    { -len, -len }, { -len, len },
-                           { len, -len }, { len, len } };
-    int i;
-    for (i = 0; i < 8; ++i) {
-      search_site *const ss = &cfg->ss[ss_count++];
-      ss->mv = ss_mvs[i];
-      ss->offset = ss->mv.row * stride + ss->mv.col;
-    }
-  }
-
-  cfg->ss_count = ss_count;
-  cfg->searches_per_step = 8;
-}
-
-/*
- * To avoid the penalty for crossing cache-line read, preload the reference
- * area in a small buffer, which is aligned to make sure there won't be crossing
- * cache-line read while reading from this buffer. This reduced the cpu
- * cycles spent on reading ref data in sub-pixel filter functions.
- * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
- * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
- * could reduce the area.
- */
-
-// convert motion vector component to offset for sv[a]f calc
-static INLINE int sp(int x) { return x & 7; }
-
-static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
-  const int offset = (r >> 3) * stride + (c >> 3);
-  return buf + offset;
-}
-
-/* checks if (r, c) has better score than previous best */
-#define CHECK_BETTER(v, r, c)                                                \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
-    MV this_mv = { r, c };                                                   \
-    v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);       \
-    if (second_pred == NULL) {                                               \
-      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),     \
-                         src_address, src_stride, &sse);                     \
-    } else if (mask) {                                                       \
-      thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),    \
-                          src_address, src_stride, second_pred, mask,        \
-                          mask_stride, invert_mask, &sse);                   \
-    } else {                                                                 \
-      if (xd->jcp_param.use_jnt_comp_avg)                                    \
-        thismse = vfp->jsvaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
-                             src_address, src_stride, &sse, second_pred,     \
-                             &xd->jcp_param);                                \
-      else                                                                   \
-        thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),  \
-                            src_address, src_stride, &sse, second_pred);     \
-    }                                                                        \
-    v += thismse;                                                            \
-    if (v < besterr) {                                                       \
-      besterr = v;                                                           \
-      br = r;                                                                \
-      bc = c;                                                                \
-      *distortion = thismse;                                                 \
-      *sse1 = sse;                                                           \
-    }                                                                        \
-  } else {                                                                   \
-    v = INT_MAX;                                                             \
-  }
-
-#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
-
-/* checks if (r, c) has better score than previous best */
-#define CHECK_BETTER1(v, r, c)                                             \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                  \
-    MV this_mv = { r, c };                                                 \
-    thismse = upsampled_pref_error(                                        \
-        xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,    \
-        pre(y, y_stride, r, c), y_stride, sp(c), sp(r), second_pred, mask, \
-        mask_stride, invert_mask, w, h, &sse, use_accurate_subpel_search); \
-    v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);     \
-    v += thismse;                                                          \
-    if (v < besterr) {                                                     \
-      besterr = v;                                                         \
-      br = r;                                                              \
-      bc = c;                                                              \
-      *distortion = thismse;                                               \
-      *sse1 = sse;                                                         \
-    }                                                                      \
-  } else {                                                                 \
-    v = INT_MAX;                                                           \
-  }
-
-#define FIRST_LEVEL_CHECKS                                       \
-  {                                                              \
-    unsigned int left, right, up, down, diag;                    \
-    CHECK_BETTER(left, tr, tc - hstep);                          \
-    CHECK_BETTER(right, tr, tc + hstep);                         \
-    CHECK_BETTER(up, tr - hstep, tc);                            \
-    CHECK_BETTER(down, tr + hstep, tc);                          \
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);     \
-    switch (whichdir) {                                          \
-      case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
-      case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
-      case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
-      case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
-    }                                                            \
-  }
-
-#define SECOND_LEVEL_CHECKS                                       \
-  {                                                               \
-    int kr, kc;                                                   \
-    unsigned int second;                                          \
-    if (tr != br && tc != bc) {                                   \
-      kr = br - tr;                                               \
-      kc = bc - tc;                                               \
-      CHECK_BETTER(second, tr + kr, tc + 2 * kc);                 \
-      CHECK_BETTER(second, tr + 2 * kr, tc + kc);                 \
-    } else if (tr == br && tc != bc) {                            \
-      kc = bc - tc;                                               \
-      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);              \
-      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);              \
-      switch (whichdir) {                                         \
-        case 0:                                                   \
-        case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
-        case 2:                                                   \
-        case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
-      }                                                           \
-    } else if (tr != br && tc == bc) {                            \
-      kr = br - tr;                                               \
-      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);              \
-      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);              \
-      switch (whichdir) {                                         \
-        case 0:                                                   \
-        case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
-        case 1:                                                   \
-        case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
-      }                                                           \
-    }                                                             \
-  }
-
-// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
-// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
-// later in the same way.
-#define SECOND_LEVEL_CHECKS_BEST(k)                \
-  {                                                \
-    unsigned int second;                           \
-    int br0 = br;                                  \
-    int bc0 = bc;                                  \
-    assert(tr == br || tc == bc);                  \
-    if (tr == br && tc != bc) {                    \
-      kc = bc - tc;                                \
-    } else if (tr != br && tc == bc) {             \
-      kr = br - tr;                                \
-    }                                              \
-    CHECK_BETTER##k(second, br0 + kr, bc0);        \
-    CHECK_BETTER##k(second, br0, bc0 + kc);        \
-    if (br0 != br || bc0 != bc) {                  \
-      CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
-    }                                              \
-  }
-
-#define SETUP_SUBPEL_SEARCH                                             \
-  const uint8_t *const src_address = x->plane[0].src.buf;               \
-  const int src_stride = x->plane[0].src.stride;                        \
-  const MACROBLOCKD *xd = &x->e_mbd;                                    \
-  unsigned int besterr = INT_MAX;                                       \
-  unsigned int sse;                                                     \
-  unsigned int whichdir;                                                \
-  int thismse;                                                          \
-  MV *bestmv = &x->best_mv.as_mv;                                       \
-  const unsigned int halfiters = iters_per_step;                        \
-  const unsigned int quarteriters = iters_per_step;                     \
-  const unsigned int eighthiters = iters_per_step;                      \
-  const int y_stride = xd->plane[0].pre[0].stride;                      \
-  const int offset = bestmv->row * y_stride + bestmv->col;              \
-  const uint8_t *const y = xd->plane[0].pre[0].buf;                     \
-                                                                        \
-  int br = bestmv->row * 8;                                             \
-  int bc = bestmv->col * 8;                                             \
-  int hstep = 4;                                                        \
-  int minc, maxc, minr, maxr;                                           \
-  int tr = br;                                                          \
-  int tc = bc;                                                          \
-                                                                        \
-  set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, \
-                             ref_mv);                                   \
-                                                                        \
-  bestmv->row *= 8;                                                     \
-  bestmv->col *= 8;
-
-static unsigned int setup_center_error(
-    const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
-    int error_per_bit, const aom_variance_fn_ptr_t *vfp,
-    const uint8_t *const src, const int src_stride, const uint8_t *const y,
-    int y_stride, const uint8_t *second_pred, const uint8_t *mask,
-    int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
-    int *mvcost[2], unsigned int *sse1, int *distortion) {
-  unsigned int besterr;
-  if (second_pred != NULL) {
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
-      uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16);
-      if (mask) {
-        aom_highbd_comp_mask_pred(comp_pred, second_pred, w, h, y + offset,
-                                  y_stride, mask, mask_stride, invert_mask);
-      } else {
-        if (xd->jcp_param.use_jnt_comp_avg)
-          aom_highbd_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
-                                       y_stride, &xd->jcp_param);
-        else
-          aom_highbd_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
-                                   y_stride);
-      }
-      besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
-    } else {
-      DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
-      if (mask) {
-        aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
-                           mask, mask_stride, invert_mask);
-      } else {
-        if (xd->jcp_param.use_jnt_comp_avg)
-          aom_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
-                                y_stride, &xd->jcp_param);
-        else
-          aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
-      }
-      besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
-    }
-  } else {
-    besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
-  }
-  *distortion = besterr;
-  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-  return besterr;
-}
-
-static INLINE int divide_and_round(int n, int d) {
-  return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
-}
-
-static INLINE int is_cost_list_wellbehaved(int *cost_list) {
-  return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
-         cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
-}
-
-// Returns surface minima estimate at given precision in 1/2^n bits.
-// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
-// For a given set of costs S0, S1, S2, S3, S4 at points
-// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
-// the solution for the location of the minima (x0, y0) is given by:
-// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
-// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
-// The code below is an integerized version of that.
-static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
-  *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
-                         (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
-  *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
-                         (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
-}
-
-int av1_find_best_sub_pixel_tree_pruned_evenmore(
-    MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *ref_mv, int allow_hp, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
-    int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
-    int mask_stride, int invert_mask, int w, int h,
-    int use_accurate_subpel_search) {
-  SETUP_SUBPEL_SEARCH;
-  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
-                               src_address, src_stride, y, y_stride,
-                               second_pred, mask, mask_stride, invert_mask, w,
-                               h, offset, mvjcost, mvcost, sse1, distortion);
-  (void)halfiters;
-  (void)quarteriters;
-  (void)eighthiters;
-  (void)whichdir;
-  (void)allow_hp;
-  (void)forced_stop;
-  (void)hstep;
-  (void)use_accurate_subpel_search;
-  (void)cm;
-  (void)mi_row;
-  (void)mi_col;
-
-  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
-      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
-      cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
-    int ir, ic;
-    unsigned int minpt;
-    get_cost_surf_min(cost_list, &ir, &ic, 2);
-    if (ir != 0 || ic != 0) {
-      CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
-    }
-  } else {
-    FIRST_LEVEL_CHECKS;
-    if (halfiters > 1) {
-      SECOND_LEVEL_CHECKS;
-    }
-
-    tr = br;
-    tc = bc;
-
-    // Each subsequent iteration checks at least one point in common with
-    // the last iteration could be 2 ( if diag selected) 1/4 pel
-    // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
-    if (forced_stop != 2) {
-      hstep >>= 1;
-      FIRST_LEVEL_CHECKS;
-      if (quarteriters > 1) {
-        SECOND_LEVEL_CHECKS;
-      }
-    }
-  }
-
-  tr = br;
-  tc = bc;
-
-  if (allow_hp && forced_stop == 0) {
-    hstep >>= 1;
-    FIRST_LEVEL_CHECKS;
-    if (eighthiters > 1) {
-      SECOND_LEVEL_CHECKS;
-    }
-  }
-
-  bestmv->row = br;
-  bestmv->col = bc;
-
-  return besterr;
-}
-
-int av1_find_best_sub_pixel_tree_pruned_more(
-    MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *ref_mv, int allow_hp, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
-    int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
-    int mask_stride, int invert_mask, int w, int h,
-    int use_accurate_subpel_search) {
-  SETUP_SUBPEL_SEARCH;
-  (void)use_accurate_subpel_search;
-  (void)cm;
-  (void)mi_row;
-  (void)mi_col;
-
-  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
-                               src_address, src_stride, y, y_stride,
-                               second_pred, mask, mask_stride, invert_mask, w,
-                               h, offset, mvjcost, mvcost, sse1, distortion);
-  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
-      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
-      cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
-    unsigned int minpt;
-    int ir, ic;
-    get_cost_surf_min(cost_list, &ir, &ic, 1);
-    if (ir != 0 || ic != 0) {
-      CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
-    }
-  } else {
-    FIRST_LEVEL_CHECKS;
-    if (halfiters > 1) {
-      SECOND_LEVEL_CHECKS;
-    }
-  }
-
-  // Each subsequent iteration checks at least one point in common with
-  // the last iteration could be 2 ( if diag selected) 1/4 pel
-
-  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
-  if (forced_stop != 2) {
-    tr = br;
-    tc = bc;
-    hstep >>= 1;
-    FIRST_LEVEL_CHECKS;
-    if (quarteriters > 1) {
-      SECOND_LEVEL_CHECKS;
-    }
-  }
-
-  if (allow_hp && forced_stop == 0) {
-    tr = br;
-    tc = bc;
-    hstep >>= 1;
-    FIRST_LEVEL_CHECKS;
-    if (eighthiters > 1) {
-      SECOND_LEVEL_CHECKS;
-    }
-  }
-  // These lines insure static analysis doesn't warn that
-  // tr and tc aren't used after the above point.
-  (void)tr;
-  (void)tc;
-
-  bestmv->row = br;
-  bestmv->col = bc;
-
-  return besterr;
-}
-
-int av1_find_best_sub_pixel_tree_pruned(
-    MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *ref_mv, int allow_hp, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
-    int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
-    int mask_stride, int invert_mask, int w, int h,
-    int use_accurate_subpel_search) {
-  SETUP_SUBPEL_SEARCH;
-  (void)use_accurate_subpel_search;
-  (void)cm;
-  (void)mi_row;
-  (void)mi_col;
-
-  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
-                               src_address, src_stride, y, y_stride,
-                               second_pred, mask, mask_stride, invert_mask, w,
-                               h, offset, mvjcost, mvcost, sse1, distortion);
-  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
-      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
-      cost_list[4] != INT_MAX) {
-    unsigned int left, right, up, down, diag;
-    whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
-               (cost_list[2] < cost_list[4] ? 0 : 2);
-    switch (whichdir) {
-      case 0:
-        CHECK_BETTER(left, tr, tc - hstep);
-        CHECK_BETTER(down, tr + hstep, tc);
-        CHECK_BETTER(diag, tr + hstep, tc - hstep);
-        break;
-      case 1:
-        CHECK_BETTER(right, tr, tc + hstep);
-        CHECK_BETTER(down, tr + hstep, tc);
-        CHECK_BETTER(diag, tr + hstep, tc + hstep);
-        break;
-      case 2:
-        CHECK_BETTER(left, tr, tc - hstep);
-        CHECK_BETTER(up, tr - hstep, tc);
-        CHECK_BETTER(diag, tr - hstep, tc - hstep);
-        break;
-      case 3:
-        CHECK_BETTER(right, tr, tc + hstep);
-        CHECK_BETTER(up, tr - hstep, tc);
-        CHECK_BETTER(diag, tr - hstep, tc + hstep);
-        break;
-    }
-  } else {
-    FIRST_LEVEL_CHECKS;
-    if (halfiters > 1) {
-      SECOND_LEVEL_CHECKS;
-    }
-  }
-
-  tr = br;
-  tc = bc;
-
-  // Each subsequent iteration checks at least one point in common with
-  // the last iteration could be 2 ( if diag selected) 1/4 pel
-
-  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
-  if (forced_stop != 2) {
-    hstep >>= 1;
-    FIRST_LEVEL_CHECKS;
-    if (quarteriters > 1) {
-      SECOND_LEVEL_CHECKS;
-    }
-    tr = br;
-    tc = bc;
-  }
-
-  if (allow_hp && forced_stop == 0) {
-    hstep >>= 1;
-    FIRST_LEVEL_CHECKS;
-    if (eighthiters > 1) {
-      SECOND_LEVEL_CHECKS;
-    }
-    tr = br;
-    tc = bc;
-  }
-  // These lines insure static analysis doesn't warn that
-  // tr and tc aren't used after the above point.
-  (void)tr;
-  (void)tc;
-
-  bestmv->row = br;
-  bestmv->col = bc;
-
-  return besterr;
-}
-
-/* clang-format off */
-static const MV search_step_table[12] = {
-  // left, right, up, down
-  { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
-  { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
-  { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
-};
-/* clang-format on */
-
-static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *const cm,
-                                int mi_row, int mi_col, const MV *const mv,
-                                const aom_variance_fn_ptr_t *vfp,
-                                const uint8_t *const src, const int src_stride,
-                                const uint8_t *const y, int y_stride,
-                                int subpel_x_q3, int subpel_y_q3,
-                                const uint8_t *second_pred, const uint8_t *mask,
-                                int mask_stride, int invert_mask, int w, int h,
-                                unsigned int *sse, int subpel_search) {
-  unsigned int besterr;
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
-    uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16);
-    if (second_pred != NULL) {
-      if (mask) {
-        aom_highbd_comp_mask_upsampled_pred(
-            xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
-            subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask, xd->bd,
-            subpel_search);
-      } else {
-        if (xd->jcp_param.use_jnt_comp_avg)
-          aom_highbd_jnt_comp_avg_upsampled_pred(
-              xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
-              subpel_y_q3, y, y_stride, xd->bd, &xd->jcp_param, subpel_search);
-        else
-          aom_highbd_comp_avg_upsampled_pred(
-              xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3,
-              subpel_y_q3, y, y_stride, xd->bd, subpel_search);
-      }
-    } else {
-      aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
-                                subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
-                                subpel_search);
-    }
-    besterr = vfp->vf(pred8, w, src, src_stride, sse);
-  } else {
-    DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
-    if (second_pred != NULL) {
-      if (mask) {
-        aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
-                                     second_pred, w, h, subpel_x_q3,
-                                     subpel_y_q3, y, y_stride, mask,
-                                     mask_stride, invert_mask, subpel_search);
-      } else {
-        if (xd->jcp_param.use_jnt_comp_avg)
-          aom_jnt_comp_avg_upsampled_pred(
-              xd, cm, mi_row, mi_col, mv, pred, second_pred, w, h, subpel_x_q3,
-              subpel_y_q3, y, y_stride, &xd->jcp_param, subpel_search);
-        else
-          aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred,
-                                      second_pred, w, h, subpel_x_q3,
-                                      subpel_y_q3, y, y_stride, subpel_search);
-      }
-    } else {
-      aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
-                         subpel_y_q3, y, y_stride, subpel_search);
-    }
-
-    besterr = vfp->vf(pred, w, src, src_stride, sse);
-  }
-  return besterr;
-}
-
-static unsigned int upsampled_setup_center_error(
-    MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *bestmv, const MV *ref_mv, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
-    const int src_stride, const uint8_t *const y, int y_stride,
-    const uint8_t *second_pred, const uint8_t *mask, int mask_stride,
-    int invert_mask, int w, int h, int offset, int *mvjcost, int *mvcost[2],
-    unsigned int *sse1, int *distortion, int subpel_search) {
-  unsigned int besterr =
-      upsampled_pref_error(xd, cm, mi_row, mi_col, bestmv, vfp, src, src_stride,
-                           y + offset, y_stride, 0, 0, second_pred, mask,
-                           mask_stride, invert_mask, w, h, sse1, subpel_search);
-  *distortion = besterr;
-  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-  return besterr;
-}
-
-// when use_accurate_subpel_search == 0
-static INLINE unsigned int estimate_upsampled_pref_error(
-    MACROBLOCKD *xd, const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
-    const int src_stride, const uint8_t *const pre, int y_stride,
-    int subpel_x_q3, int subpel_y_q3, const uint8_t *second_pred,
-    const uint8_t *mask, int mask_stride, int invert_mask, unsigned int *sse) {
-  if (second_pred == NULL) {
-    return vfp->svf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
-                    sse);
-  } else if (mask) {
-    return vfp->msvf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
-                     second_pred, mask, mask_stride, invert_mask, sse);
-  } else {
-    if (xd->jcp_param.use_jnt_comp_avg)
-      return vfp->jsvaf(pre, y_stride, subpel_x_q3, subpel_y_q3, src,
-                        src_stride, sse, second_pred, &xd->jcp_param);
-    else
-      return vfp->svaf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride,
-                       sse, second_pred);
-  }
-}
-
-int av1_find_best_sub_pixel_tree(
-    MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *ref_mv, int allow_hp, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
-    int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
-    int mask_stride, int invert_mask, int w, int h,
-    int use_accurate_subpel_search) {
-  const uint8_t *const src_address = x->plane[0].src.buf;
-  const int src_stride = x->plane[0].src.stride;
-  MACROBLOCKD *xd = &x->e_mbd;
-  unsigned int besterr = INT_MAX;
-  unsigned int sse;
-  unsigned int thismse;
-  const int y_stride = xd->plane[0].pre[0].stride;
-  MV *bestmv = &x->best_mv.as_mv;
-  const int offset = bestmv->row * y_stride + bestmv->col;
-  const uint8_t *const y = xd->plane[0].pre[0].buf;
-
-  int br = bestmv->row * 8;
-  int bc = bestmv->col * 8;
-  int hstep = 4;
-  int iter, round = 3 - forced_stop;
-  int tr = br;
-  int tc = bc;
-  const MV *search_step = search_step_table;
-  int idx, best_idx = -1;
-  unsigned int cost_array[5];
-  int kr, kc;
-  int minc, maxc, minr, maxr;
-
-  set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
-
-  if (!allow_hp)
-    if (round == 3) round = 2;
-
-  bestmv->row *= 8;
-  bestmv->col *= 8;
-
-  if (use_accurate_subpel_search)
-    besterr = upsampled_setup_center_error(
-        xd, cm, mi_row, mi_col, bestmv, ref_mv, error_per_bit, vfp, src_address,
-        src_stride, y, y_stride, second_pred, mask, mask_stride, invert_mask, w,
-        h, offset, mvjcost, mvcost, sse1, distortion,
-        use_accurate_subpel_search);
-  else
-    besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
-                                 src_address, src_stride, y, y_stride,
-                                 second_pred, mask, mask_stride, invert_mask, w,
-                                 h, offset, mvjcost, mvcost, sse1, distortion);
-
-  (void)cost_list;  // to silence compiler warning
-
-  for (iter = 0; iter < round; ++iter) {
-    // Check vertical and horizontal sub-pixel positions.
-    for (idx = 0; idx < 4; ++idx) {
-      tr = br + search_step[idx].row;
-      tc = bc + search_step[idx].col;
-      if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
-        MV this_mv = { tr, tc };
-
-        if (use_accurate_subpel_search) {
-          thismse = upsampled_pref_error(
-              xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
-              pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
-              mask, mask_stride, invert_mask, w, h, &sse,
-              use_accurate_subpel_search);
-        } else {
-          thismse = estimate_upsampled_pref_error(
-              xd, vfp, src_address, src_stride, pre(y, y_stride, tr, tc),
-              y_stride, sp(tc), sp(tr), second_pred, mask, mask_stride,
-              invert_mask, &sse);
-        }
-
-        cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
-                                                mvcost, error_per_bit);
-
-        if (cost_array[idx] < besterr) {
-          best_idx = idx;
-          besterr = cost_array[idx];
-          *distortion = thismse;
-          *sse1 = sse;
-        }
-      } else {
-        cost_array[idx] = INT_MAX;
-      }
-    }
-
-    // Check diagonal sub-pixel position
-    kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
-    kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
-
-    tc = bc + kc;
-    tr = br + kr;
-    if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
-      MV this_mv = { tr, tc };
-
-      if (use_accurate_subpel_search) {
-        thismse = upsampled_pref_error(
-            xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride,
-            pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred,
-            mask, mask_stride, invert_mask, w, h, &sse,
-            use_accurate_subpel_search);
-      } else {
-        thismse = estimate_upsampled_pref_error(
-            xd, vfp, src_address, src_stride, pre(y, y_stride, tr, tc),
-            y_stride, sp(tc), sp(tr), second_pred, mask, mask_stride,
-            invert_mask, &sse);
-      }
-
-      cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                                            error_per_bit);
-
-      if (cost_array[4] < besterr) {
-        best_idx = 4;
-        besterr = cost_array[4];
-        *distortion = thismse;
-        *sse1 = sse;
-      }
-    } else {
-      cost_array[idx] = INT_MAX;
-    }
-
-    if (best_idx < 4 && best_idx >= 0) {
-      br += search_step[best_idx].row;
-      bc += search_step[best_idx].col;
-    } else if (best_idx == 4) {
-      br = tr;
-      bc = tc;
-    }
-
-    if (iters_per_step > 1 && best_idx != -1) {
-      if (use_accurate_subpel_search) {
-        SECOND_LEVEL_CHECKS_BEST(1);
-      } else {
-        SECOND_LEVEL_CHECKS_BEST(0);
-      }
-    }
-
-    search_step += 4;
-    hstep >>= 1;
-    best_idx = -1;
-  }
-
-  // These lines insure static analysis doesn't warn that
-  // tr and tc aren't used after the above point.
-  (void)tr;
-  (void)tc;
-
-  bestmv->row = br;
-  bestmv->col = bc;
-
-  return besterr;
-}
-
-#undef PRE
-#undef CHECK_BETTER
-
-unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
-                                     BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                     const MV *this_mv) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  const uint8_t *const src = x->plane[0].src.buf;
-  const int src_stride = x->plane[0].src.stride;
-  uint8_t *const dst = xd->plane[0].dst.buf;
-  const int dst_stride = xd->plane[0].dst.stride;
-  const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize];
-  const int_mv ref_mv = av1_get_ref_mv(x, 0);
-  unsigned int mse;
-  unsigned int sse;
-
-  av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
-  mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
-  mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost, x->mvcost,
-                     x->errorperbit);
-  return mse;
-}
-
-// Refine MV in a small range
-unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
-                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                  int *pts0, int *pts_inref0,
-                                  int total_samples) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
-                            { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } };
-  const int_mv ref_mv = av1_get_ref_mv(x, 0);
-  int16_t br = mbmi->mv[0].as_mv.row;
-  int16_t bc = mbmi->mv[0].as_mv.col;
-  int16_t *tr = &mbmi->mv[0].as_mv.row;
-  int16_t *tc = &mbmi->mv[0].as_mv.col;
-  WarpedMotionParams best_wm_params = mbmi->wm_params;
-  int best_num_proj_ref = mbmi->num_proj_ref;
-  unsigned int bestmse;
-  int minc, maxc, minr, maxr;
-  const int start = cm->allow_high_precision_mv ? 0 : 4;
-  int ite;
-
-  set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
-                             &ref_mv.as_mv);
-
-  // Calculate the center position's error
-  assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr);
-  bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col,
-                                    &mbmi->mv[0].as_mv);
-
-  // MV search
-  for (ite = 0; ite < 2; ++ite) {
-    int best_idx = -1;
-    int idx;
-
-    for (idx = start; idx < start + 4; ++idx) {
-      unsigned int thismse;
-
-      *tr = br + neighbors[idx].row;
-      *tc = bc + neighbors[idx].col;
-
-      if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
-        MV this_mv = { *tr, *tc };
-        int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
-
-        memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
-        memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
-        if (total_samples > 1)
-          mbmi->num_proj_ref =
-              selectSamples(&this_mv, pts, pts_inref, total_samples, bsize);
-
-        if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, *tr,
-                             *tc, &mbmi->wm_params, mi_row, mi_col)) {
-          thismse =
-              av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv);
-
-          if (thismse < bestmse) {
-            best_idx = idx;
-            best_wm_params = mbmi->wm_params;
-            best_num_proj_ref = mbmi->num_proj_ref;
-            bestmse = thismse;
-          }
-        }
-      }
-    }
-
-    if (best_idx == -1) break;
-
-    if (best_idx >= 0) {
-      br += neighbors[best_idx].row;
-      bc += neighbors[best_idx].col;
-    }
-  }
-
-  *tr = br;
-  *tc = bc;
-  mbmi->wm_params = best_wm_params;
-  mbmi->num_proj_ref = best_num_proj_ref;
-  return bestmse;
-}
-
-static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
-                               int range) {
-  return ((row - range) >= mv_limits->row_min) &
-         ((row + range) <= mv_limits->row_max) &
-         ((col - range) >= mv_limits->col_min) &
-         ((col + range) <= mv_limits->col_max);
-}
-
-static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
-  return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
-         (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
-}
-
-#define CHECK_BETTER                                                      \
-  {                                                                       \
-    if (thissad < bestsad) {                                              \
-      if (use_mvcost)                                                     \
-        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
-      if (thissad < bestsad) {                                            \
-        bestsad = thissad;                                                \
-        best_site = i;                                                    \
-      }                                                                   \
-    }                                                                     \
-  }
-
-#define MAX_PATTERN_SCALES 11
-#define MAX_PATTERN_CANDIDATES 8  // max number of canddiates per scale
-#define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
-
-// Calculate and return a sad+mvcost list around an integer best pel.
-static INLINE void calc_int_cost_list(const MACROBLOCK *x,
-                                      const MV *const ref_mv, int sadpb,
-                                      const aom_variance_fn_ptr_t *fn_ptr,
-                                      const MV *best_mv, int *cost_list) {
-  static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
-  const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
-  const int br = best_mv->row;
-  const int bc = best_mv->col;
-  int i;
-  unsigned int sse;
-  const MV this_mv = { br, bc };
-
-  cost_list[0] =
-      fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
-                 in_what->stride, &sse) +
-      mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
-  if (check_bounds(&x->mv_limits, br, bc, 1)) {
-    for (i = 0; i < 4; i++) {
-      const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
-      cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
-                                    get_buf_from_mv(in_what, &neighbor_mv),
-                                    in_what->stride, &sse) +
-                         mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
-                                     x->mvcost, x->errorperbit);
-    }
-  } else {
-    for (i = 0; i < 4; i++) {
-      const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
-      if (!is_mv_in(&x->mv_limits, &neighbor_mv))
-        cost_list[i + 1] = INT_MAX;
-      else
-        cost_list[i + 1] =
-            fn_ptr->vf(what->buf, what->stride,
-                       get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
-                       &sse) +
-            mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
-                        x->errorperbit);
-    }
-  }
-}
-
-static INLINE void calc_int_sad_list(const MACROBLOCK *x,
-                                     const MV *const ref_mv, int sadpb,
-                                     const aom_variance_fn_ptr_t *fn_ptr,
-                                     const MV *best_mv, int *cost_list,
-                                     const int use_mvcost, const int bestsad) {
-  static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
-  const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
-  int i;
-  const int br = best_mv->row;
-  const int bc = best_mv->col;
-
-  if (cost_list[0] == INT_MAX) {
-    cost_list[0] = bestsad;
-    if (check_bounds(&x->mv_limits, br, bc, 1)) {
-      for (i = 0; i < 4; i++) {
-        const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
-        cost_list[i + 1] =
-            fn_ptr->sdf(what->buf, what->stride,
-                        get_buf_from_mv(in_what, &this_mv), in_what->stride);
-      }
-    } else {
-      for (i = 0; i < 4; i++) {
-        const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
-        if (!is_mv_in(&x->mv_limits, &this_mv))
-          cost_list[i + 1] = INT_MAX;
-        else
-          cost_list[i + 1] =
-              fn_ptr->sdf(what->buf, what->stride,
-                          get_buf_from_mv(in_what, &this_mv), in_what->stride);
-      }
-    }
-  } else {
-    if (use_mvcost) {
-      for (i = 0; i < 4; i++) {
-        const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
-        if (cost_list[i + 1] != INT_MAX) {
-          cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
-        }
-      }
-    }
-  }
-}
-
-// Generic pattern search function that searches over multiple scales.
-// Each scale can have a different number of candidates and shape of
-// candidates as indicated in the num_candidates and candidates arrays
-// passed into this function
-//
-static int pattern_search(
-    MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit,
-    int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp,
-    int use_mvcost, const MV *center_mv,
-    const int num_candidates[MAX_PATTERN_SCALES],
-    const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
-    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
-  };
-  int i, s, t;
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  const int last_is_4 = num_candidates[0] == 4;
-  int br, bc;
-  int bestsad = INT_MAX;
-  int thissad;
-  int k = -1;
-  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
-  assert(search_param < MAX_MVSEARCH_STEPS);
-  int best_init_s = search_param_to_steps[search_param];
-  // adjust ref_mv to make sure it is within MV range
-  clamp_mv(start_mv, x->mv_limits.col_min, x->mv_limits.col_max,
-           x->mv_limits.row_min, x->mv_limits.row_max);
-  br = start_mv->row;
-  bc = start_mv->col;
-  if (cost_list != NULL) {
-    cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
-        INT_MAX;
-  }
-
-  // Work out the start point for the search
-  bestsad = vfp->sdf(what->buf, what->stride,
-                     get_buf_from_mv(in_what, start_mv), in_what->stride) +
-            mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit);
-
-  // Search all possible scales upto the search param around the center point
-  // pick the scale of the point that is best as the starting scale of
-  // further steps around it.
-  if (do_init_search) {
-    s = best_init_s;
-    best_init_s = -1;
-    for (t = 0; t <= s; ++t) {
-      int best_site = -1;
-      if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
-        for (i = 0; i < num_candidates[t]; i++) {
-          const MV this_mv = { br + candidates[t][i].row,
-                               bc + candidates[t][i].col };
-          thissad =
-              vfp->sdf(what->buf, what->stride,
-                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
-          CHECK_BETTER
-        }
-      } else {
-        for (i = 0; i < num_candidates[t]; i++) {
-          const MV this_mv = { br + candidates[t][i].row,
-                               bc + candidates[t][i].col };
-          if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
-          thissad =
-              vfp->sdf(what->buf, what->stride,
-                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
-          CHECK_BETTER
-        }
-      }
-      if (best_site == -1) {
-        continue;
-      } else {
-        best_init_s = t;
-        k = best_site;
-      }
-    }
-    if (best_init_s != -1) {
-      br += candidates[best_init_s][k].row;
-      bc += candidates[best_init_s][k].col;
-    }
-  }
-
-  // If the center point is still the best, just skip this and move to
-  // the refinement step.
-  if (best_init_s != -1) {
-    const int last_s = (last_is_4 && cost_list != NULL);
-    int best_site = -1;
-    s = best_init_s;
-
-    for (; s >= last_s; s--) {
-      // No need to search all points the 1st time if initial search was used
-      if (!do_init_search || s != best_init_s) {
-        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
-          for (i = 0; i < num_candidates[s]; i++) {
-            const MV this_mv = { br + candidates[s][i].row,
-                                 bc + candidates[s][i].col };
-            thissad =
-                vfp->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
-            CHECK_BETTER
-          }
-        } else {
-          for (i = 0; i < num_candidates[s]; i++) {
-            const MV this_mv = { br + candidates[s][i].row,
-                                 bc + candidates[s][i].col };
-            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
-            thissad =
-                vfp->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
-            CHECK_BETTER
-          }
-        }
-
-        if (best_site == -1) {
-          continue;
-        } else {
-          br += candidates[s][best_site].row;
-          bc += candidates[s][best_site].col;
-          k = best_site;
-        }
-      }
-
-      do {
-        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
-        best_site = -1;
-        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
-        next_chkpts_indices[1] = k;
-        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
-
-        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
-          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
-            const MV this_mv = {
-              br + candidates[s][next_chkpts_indices[i]].row,
-              bc + candidates[s][next_chkpts_indices[i]].col
-            };
-            thissad =
-                vfp->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
-            CHECK_BETTER
-          }
-        } else {
-          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
-            const MV this_mv = {
-              br + candidates[s][next_chkpts_indices[i]].row,
-              bc + candidates[s][next_chkpts_indices[i]].col
-            };
-            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
-            thissad =
-                vfp->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
-            CHECK_BETTER
-          }
-        }
-
-        if (best_site != -1) {
-          k = next_chkpts_indices[best_site];
-          br += candidates[s][k].row;
-          bc += candidates[s][k].col;
-        }
-      } while (best_site != -1);
-    }
-
-    // Note: If we enter the if below, then cost_list must be non-NULL.
-    if (s == 0) {
-      cost_list[0] = bestsad;
-      if (!do_init_search || s != best_init_s) {
-        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
-          for (i = 0; i < num_candidates[s]; i++) {
-            const MV this_mv = { br + candidates[s][i].row,
-                                 bc + candidates[s][i].col };
-            cost_list[i + 1] = thissad =
-                vfp->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
-            CHECK_BETTER
-          }
-        } else {
-          for (i = 0; i < num_candidates[s]; i++) {
-            const MV this_mv = { br + candidates[s][i].row,
-                                 bc + candidates[s][i].col };
-            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
-            cost_list[i + 1] = thissad =
-                vfp->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
-            CHECK_BETTER
-          }
-        }
-
-        if (best_site != -1) {
-          br += candidates[s][best_site].row;
-          bc += candidates[s][best_site].col;
-          k = best_site;
-        }
-      }
-      while (best_site != -1) {
-        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
-        best_site = -1;
-        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
-        next_chkpts_indices[1] = k;
-        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
-        cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
-        cost_list[((k + 2) % 4) + 1] = cost_list[0];
-        cost_list[0] = bestsad;
-
-        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
-          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
-            const MV this_mv = {
-              br + candidates[s][next_chkpts_indices[i]].row,
-              bc + candidates[s][next_chkpts_indices[i]].col
-            };
-            cost_list[next_chkpts_indices[i] + 1] = thissad =
-                vfp->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
-            CHECK_BETTER
-          }
-        } else {
-          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
-            const MV this_mv = {
-              br + candidates[s][next_chkpts_indices[i]].row,
-              bc + candidates[s][next_chkpts_indices[i]].col
-            };
-            if (!is_mv_in(&x->mv_limits, &this_mv)) {
-              cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
-              continue;
-            }
-            cost_list[next_chkpts_indices[i] + 1] = thissad =
-                vfp->sdf(what->buf, what->stride,
-                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
-            CHECK_BETTER
-          }
-        }
-
-        if (best_site != -1) {
-          k = next_chkpts_indices[best_site];
-          br += candidates[s][k].row;
-          bc += candidates[s][k].col;
-        }
-      }
-    }
-  }
-
-  // Returns the one-away integer pel cost/sad around the best as follows:
-  // cost_list[0]: cost/sad at the best integer pel
-  // cost_list[1]: cost/sad at delta {0, -1} (left)   from the best integer pel
-  // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
-  // cost_list[3]: cost/sad at delta { 0, 1} (right)  from the best integer pel
-  // cost_list[4]: cost/sad at delta {-1, 0} (top)    from the best integer pel
-  if (cost_list) {
-    const MV best_int_mv = { br, bc };
-    if (last_is_4) {
-      calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
-                        use_mvcost, bestsad);
-    } else {
-      calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
-                         cost_list);
-    }
-  }
-  x->best_mv.as_mv.row = br;
-  x->best_mv.as_mv.col = bc;
-  return bestsad;
-}
-
-int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
-                       const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
-                       int use_mvcost) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  const MV mv = { best_mv->row * 8, best_mv->col * 8 };
-  unsigned int unused;
-
-  return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
-                 in_what->stride, &unused) +
-         (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
-                                   x->errorperbit)
-                     : 0);
-}
-
-int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
-                          const MV *center_mv, const uint8_t *second_pred,
-                          const aom_variance_fn_ptr_t *vfp, int use_mvcost) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  const MV mv = { best_mv->row * 8, best_mv->col * 8 };
-  unsigned int unused;
-
-  if (xd->jcp_param.use_jnt_comp_avg)
-    return vfp->jsvaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
-                      what->buf, what->stride, &unused, second_pred,
-                      &xd->jcp_param) +
-           (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
-                                     x->errorperbit)
-                       : 0);
-  else
-    return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
-                     what->buf, what->stride, &unused, second_pred) +
-           (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
-                                     x->errorperbit)
-                       : 0);
-}
-
-int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
-                            const MV *center_mv, const uint8_t *second_pred,
-                            const uint8_t *mask, int mask_stride,
-                            int invert_mask, const aom_variance_fn_ptr_t *vfp,
-                            int use_mvcost) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  const MV mv = { best_mv->row * 8, best_mv->col * 8 };
-  unsigned int unused;
-
-  return vfp->msvf(what->buf, what->stride, 0, 0,
-                   get_buf_from_mv(in_what, best_mv), in_what->stride,
-                   second_pred, mask, mask_stride, invert_mask, &unused) +
-         (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
-                                   x->errorperbit)
-                     : 0);
-}
-
-int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
-                   int sad_per_bit, int do_init_search, int *cost_list,
-                   const aom_variance_fn_ptr_t *vfp, int use_mvcost,
-                   const MV *center_mv) {
-  // First scale has 8-closest points, the rest have 6 points in hex shape
-  // at increasing scales
-  static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
-                                                              6, 6, 6, 6, 6 };
-  // Note that the largest candidate step at each scale is 2^scale
-  /* clang-format off */
-  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
-    { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
-      { -1, 0 } },
-    { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
-    { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
-    { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
-    { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
-    { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
-      { -32, 0 } },
-    { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
-      { -64, 0 } },
-    { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
-      { -128, 0 } },
-    { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
-      { -256, 0 } },
-    { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
-      { -512, 0 } },
-    { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
-      { -512, 1024 }, { -1024, 0 } },
-  };
-  /* clang-format on */
-  return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
-                        cost_list, vfp, use_mvcost, center_mv,
-                        hex_num_candidates, hex_candidates);
-}
-
-static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param,
-                         int sad_per_bit, int do_init_search, int *cost_list,
-                         const aom_variance_fn_ptr_t *vfp, int use_mvcost,
-                         const MV *center_mv) {
-  // First scale has 4-closest points, the rest have 8 points in diamond
-  // shape at increasing scales
-  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
-    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  };
-  // Note that the largest candidate step at each scale is 2^scale
-  /* clang-format off */
-  static const MV
-      bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
-        { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
-        { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
-          { -1, 1 }, { -2, 0 } },
-        { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
-          { -2, 2 }, { -4, 0 } },
-        { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
-          { -4, 4 }, { -8, 0 } },
-        { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
-          { -8, 8 }, { -16, 0 } },
-        { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
-          { 0, 32 }, { -16, 16 }, { -32, 0 } },
-        { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
-          { 0, 64 }, { -32, 32 }, { -64, 0 } },
-        { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
-          { 0, 128 }, { -64, 64 }, { -128, 0 } },
-        { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
-          { 0, 256 }, { -128, 128 }, { -256, 0 } },
-        { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
-          { 0, 512 }, { -256, 256 }, { -512, 0 } },
-        { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
-          { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
-      };
-  /* clang-format on */
-  return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
-                        cost_list, vfp, use_mvcost, center_mv,
-                        bigdia_num_candidates, bigdia_candidates);
-}
-
-static int square_search(MACROBLOCK *x, MV *start_mv, int search_param,
-                         int sad_per_bit, int do_init_search, int *cost_list,
-                         const aom_variance_fn_ptr_t *vfp, int use_mvcost,
-                         const MV *center_mv) {
-  // All scales have 8 closest points in square shape
-  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  };
-  // Note that the largest candidate step at each scale is 2^scale
-  /* clang-format off */
-  static const MV
-      square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
-        { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
-          { -1, 1 }, { -1, 0 } },
-        { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
-          { -2, 2 }, { -2, 0 } },
-        { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
-          { -4, 4 }, { -4, 0 } },
-        { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
-          { -8, 8 }, { -8, 0 } },
-        { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
-          { 0, 16 }, { -16, 16 }, { -16, 0 } },
-        { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
-          { 0, 32 }, { -32, 32 }, { -32, 0 } },
-        { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
-          { 0, 64 }, { -64, 64 }, { -64, 0 } },
-        { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
-          { 0, 128 }, { -128, 128 }, { -128, 0 } },
-        { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
-          { 0, 256 }, { -256, 256 }, { -256, 0 } },
-        { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
-          { 0, 512 }, { -512, 512 }, { -512, 0 } },
-        { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
-          { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
-      };
-  /* clang-format on */
-  return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
-                        cost_list, vfp, use_mvcost, center_mv,
-                        square_num_candidates, square_candidates);
-}
-
-static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param,
-                           int sad_per_bit,
-                           int do_init_search,  // must be zero for fast_hex
-                           int *cost_list, const aom_variance_fn_ptr_t *vfp,
-                           int use_mvcost, const MV *center_mv) {
-  return av1_hex_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
-                        sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
-                        center_mv);
-}
-
-static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param,
-                           int sad_per_bit, int do_init_search, int *cost_list,
-                           const aom_variance_fn_ptr_t *vfp, int use_mvcost,
-                           const MV *center_mv) {
-  return bigdia_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
-                       sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
-                       center_mv);
-}
-
-#undef CHECK_BETTER
-
-// Exhuastive motion search around a given centre position with a given
-// step size.
-static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
-                                  int range, int step, int sad_per_bit,
-                                  const aom_variance_fn_ptr_t *fn_ptr,
-                                  const MV *center_mv) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  MV fcenter_mv = { center_mv->row, center_mv->col };
-  unsigned int best_sad = INT_MAX;
-  int r, c, i;
-  int start_col, end_col, start_row, end_row;
-  int col_step = (step > 1) ? step : 4;
-
-  assert(step >= 1);
-
-  clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
-           x->mv_limits.row_min, x->mv_limits.row_max);
-  *best_mv = fcenter_mv;
-  best_sad =
-      fn_ptr->sdf(what->buf, what->stride,
-                  get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
-      mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
-  start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
-  start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
-  end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row);
-  end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col);
-
-  for (r = start_row; r <= end_row; r += step) {
-    for (c = start_col; c <= end_col; c += col_step) {
-      // Step > 1 means we are not checking every location in this pass.
-      if (step > 1) {
-        const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
-        unsigned int sad =
-            fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
-                        in_what->stride);
-        if (sad < best_sad) {
-          sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
-          if (sad < best_sad) {
-            best_sad = sad;
-            x->second_best_mv.as_mv = *best_mv;
-            *best_mv = mv;
-          }
-        }
-      } else {
-        // 4 sads in a single call if we are checking every location
-        if (c + 3 <= end_col) {
-          unsigned int sads[4];
-          const uint8_t *addrs[4];
-          for (i = 0; i < 4; ++i) {
-            const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-            addrs[i] = get_buf_from_mv(in_what, &mv);
-          }
-          fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
-
-          for (i = 0; i < 4; ++i) {
-            if (sads[i] < best_sad) {
-              const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-              const unsigned int sad =
-                  sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
-              if (sad < best_sad) {
-                best_sad = sad;
-                x->second_best_mv.as_mv = *best_mv;
-                *best_mv = mv;
-              }
-            }
-          }
-        } else {
-          for (i = 0; i < end_col - c; ++i) {
-            const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
-            unsigned int sad =
-                fn_ptr->sdf(what->buf, what->stride,
-                            get_buf_from_mv(in_what, &mv), in_what->stride);
-            if (sad < best_sad) {
-              sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
-              if (sad < best_sad) {
-                best_sad = sad;
-                x->second_best_mv.as_mv = *best_mv;
-                *best_mv = mv;
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  return best_sad;
-}
-
-int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg,
-                             MV *ref_mv, MV *best_mv, int search_param,
-                             int sad_per_bit, int *num00,
-                             const aom_variance_fn_ptr_t *fn_ptr,
-                             const MV *center_mv) {
-  int i, j, step;
-
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *in_what;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *best_address;
-
-  unsigned int bestsad = INT_MAX;
-  int best_site = 0;
-  int last_site = 0;
-
-  int ref_row;
-  int ref_col;
-
-  // search_param determines the length of the initial step and hence the number
-  // of iterations.
-  // 0 = initial step (MAX_FIRST_STEP) pel
-  // 1 = (MAX_FIRST_STEP/2) pel,
-  // 2 = (MAX_FIRST_STEP/4) pel...
-  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
-  const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
-
-  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
-  clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
-           x->mv_limits.row_min, x->mv_limits.row_max);
-  ref_row = ref_mv->row;
-  ref_col = ref_mv->col;
-  *num00 = 0;
-  best_mv->row = ref_row;
-  best_mv->col = ref_col;
-
-  // Work out the start point for the search
-  in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
-  best_address = in_what;
-
-  // Check the starting position
-  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
-            mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
-  i = 1;
-
-  for (step = 0; step < tot_steps; step++) {
-    int all_in = 1, t;
-
-    // All_in is true if every one of the points we are checking are within
-    // the bounds of the image.
-    all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min);
-    all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max);
-    all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min);
-    all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max);
-
-    // If all the pixels are within the bounds we don't check whether the
-    // search point is valid in this loop,  otherwise we check each point
-    // for validity..
-    if (all_in) {
-      unsigned int sad_array[4];
-
-      for (j = 0; j < cfg->searches_per_step; j += 4) {
-        unsigned char const *block_offset[4];
-
-        for (t = 0; t < 4; t++)
-          block_offset[t] = ss[i + t].offset + best_address;
-
-        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
-                       sad_array);
-
-        for (t = 0; t < 4; t++, i++) {
-          if (sad_array[t] < bestsad) {
-            const MV this_mv = { best_mv->row + ss[i].mv.row,
-                                 best_mv->col + ss[i].mv.col };
-            sad_array[t] +=
-                mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-            if (sad_array[t] < bestsad) {
-              bestsad = sad_array[t];
-              best_site = i;
-            }
-          }
-        }
-      }
-    } else {
-      for (j = 0; j < cfg->searches_per_step; j++) {
-        // Trap illegal vectors
-        const MV this_mv = { best_mv->row + ss[i].mv.row,
-                             best_mv->col + ss[i].mv.col };
-
-        if (is_mv_in(&x->mv_limits, &this_mv)) {
-          const uint8_t *const check_here = ss[i].offset + best_address;
-          unsigned int thissad =
-              fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
-
-          if (thissad < bestsad) {
-            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-            if (thissad < bestsad) {
-              bestsad = thissad;
-              best_site = i;
-            }
-          }
-        }
-        i++;
-      }
-    }
-    if (best_site != last_site) {
-      x->second_best_mv.as_mv = *best_mv;
-      best_mv->row += ss[best_site].mv.row;
-      best_mv->col += ss[best_site].mv.col;
-      best_address += ss[best_site].offset;
-      last_site = best_site;
-#if defined(NEW_DIAMOND_SEARCH)
-      while (1) {
-        const MV this_mv = { best_mv->row + ss[best_site].mv.row,
-                             best_mv->col + ss[best_site].mv.col };
-        if (is_mv_in(&x->mv_limits, &this_mv)) {
-          const uint8_t *const check_here = ss[best_site].offset + best_address;
-          unsigned int thissad =
-              fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
-          if (thissad < bestsad) {
-            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-            if (thissad < bestsad) {
-              bestsad = thissad;
-              best_mv->row += ss[best_site].mv.row;
-              best_mv->col += ss[best_site].mv.col;
-              best_address += ss[best_site].offset;
-              continue;
-            }
-          }
-        }
-        break;
-      }
-#endif
-    } else if (best_address == in_what) {
-      (*num00)++;
-    }
-  }
-  return bestsad;
-}
-
-/* do_refine: If last step (1-away) of n-step search doesn't pick the center
-              point as the best match, we will do a final 1-away diamond
-              refining search  */
-static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
-                              MV *mvp_full, int step_param, int sadpb,
-                              int further_steps, int do_refine, int *cost_list,
-                              const aom_variance_fn_ptr_t *fn_ptr,
-                              const MV *ref_mv) {
-  MV temp_mv;
-  int thissme, n, num00 = 0;
-  int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
-                                        step_param, sadpb, &n, fn_ptr, ref_mv);
-  if (bestsme < INT_MAX)
-    bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
-  x->best_mv.as_mv = temp_mv;
-
-  // If there won't be more n-step search, check to see if refining search is
-  // needed.
-  if (n > further_steps) do_refine = 0;
-
-  while (n < further_steps) {
-    ++n;
-
-    if (num00) {
-      num00--;
-    } else {
-      thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
-                                        step_param + n, sadpb, &num00, fn_ptr,
-                                        ref_mv);
-      if (thissme < INT_MAX)
-        thissme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
-
-      // check to see if refining search is needed.
-      if (num00 > further_steps - n) do_refine = 0;
-
-      if (thissme < bestsme) {
-        bestsme = thissme;
-        x->best_mv.as_mv = temp_mv;
-      }
-    }
-  }
-
-  // final 1-away diamond refining search
-  if (do_refine) {
-    const int search_range = 8;
-    MV best_mv = x->best_mv.as_mv;
-    thissme = av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
-                                      ref_mv);
-    if (thissme < INT_MAX)
-      thissme = av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
-    if (thissme < bestsme) {
-      bestsme = thissme;
-      x->best_mv.as_mv = best_mv;
-    }
-  }
-
-  // Return cost list.
-  if (cost_list) {
-    calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
-  }
-  return bestsme;
-}
-
-#define MIN_RANGE 7
-#define MAX_RANGE 256
-#define MIN_INTERVAL 1
-// Runs an limited range exhaustive mesh search using a pattern set
-// according to the encode speed profile.
-static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                 const MV *centre_mv_full, int sadpb,
-                                 int *cost_list,
-                                 const aom_variance_fn_ptr_t *fn_ptr,
-                                 const MV *ref_mv, MV *dst_mv) {
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
-  MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
-  int bestsme;
-  int i;
-  int interval = sf->mesh_patterns[0].interval;
-  int range = sf->mesh_patterns[0].range;
-  int baseline_interval_divisor;
-
-  // Keep track of number of exhaustive calls (this frame in this thread).
-  ++(*x->ex_search_count_ptr);
-
-  // Trap illegal values for interval and range for this function.
-  if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
-      (interval > range))
-    return INT_MAX;
-
-  baseline_interval_divisor = range / interval;
-
-  // Check size of proposed first range against magnitude of the centre
-  // value used as a starting point.
-  range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
-  range = AOMMIN(range, MAX_RANGE);
-  interval = AOMMAX(interval, range / baseline_interval_divisor);
-
-  // initial search
-  bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
-                                   sadpb, fn_ptr, &temp_mv);
-
-  if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
-    // Progressive searches with range and step size decreasing each time
-    // till we reach a step size of 1. Then break out.
-    for (i = 1; i < MAX_MESH_STEP; ++i) {
-      // First pass with coarser step and longer range
-      bestsme = exhuastive_mesh_search(
-          x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
-          sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
-
-      if (sf->mesh_patterns[i].interval == 1) break;
-    }
-  }
-
-  if (bestsme < INT_MAX)
-    bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
-  *dst_mv = temp_mv;
-
-  // Return cost list.
-  if (cost_list) {
-    calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
-  }
-  return bestsme;
-}
-
-int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
-                            int search_range,
-                            const aom_variance_fn_ptr_t *fn_ptr,
-                            const MV *center_mv) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
-  const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
-  unsigned int best_sad =
-      fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
-      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
-  int i, j;
-
-  for (i = 0; i < search_range; i++) {
-    int best_site = -1;
-    const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
-                       ((ref_mv->row + 1) < x->mv_limits.row_max) &
-                       ((ref_mv->col - 1) > x->mv_limits.col_min) &
-                       ((ref_mv->col + 1) < x->mv_limits.col_max);
-
-    if (all_in) {
-      unsigned int sads[4];
-      const uint8_t *const positions[4] = { best_address - in_what->stride,
-                                            best_address - 1, best_address + 1,
-                                            best_address + in_what->stride };
-
-      fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
-
-      for (j = 0; j < 4; ++j) {
-        if (sads[j] < best_sad) {
-          const MV mv = { ref_mv->row + neighbors[j].row,
-                          ref_mv->col + neighbors[j].col };
-          sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
-          if (sads[j] < best_sad) {
-            best_sad = sads[j];
-            best_site = j;
-          }
-        }
-      }
-    } else {
-      for (j = 0; j < 4; ++j) {
-        const MV mv = { ref_mv->row + neighbors[j].row,
-                        ref_mv->col + neighbors[j].col };
-
-        if (is_mv_in(&x->mv_limits, &mv)) {
-          unsigned int sad =
-              fn_ptr->sdf(what->buf, what->stride,
-                          get_buf_from_mv(in_what, &mv), in_what->stride);
-          if (sad < best_sad) {
-            sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
-            if (sad < best_sad) {
-              best_sad = sad;
-              best_site = j;
-            }
-          }
-        }
-      }
-    }
-
-    if (best_site == -1) {
-      break;
-    } else {
-      x->second_best_mv.as_mv = *ref_mv;
-      ref_mv->row += neighbors[best_site].row;
-      ref_mv->col += neighbors[best_site].col;
-      best_address = get_buf_from_mv(in_what, ref_mv);
-    }
-  }
-
-  return best_sad;
-}
-
-// This function is called when we do joint motion search in comp_inter_inter
-// mode, or when searching for one component of an ext-inter compound mode.
-int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
-                             const aom_variance_fn_ptr_t *fn_ptr,
-                             const uint8_t *mask, int mask_stride,
-                             int invert_mask, const MV *center_mv,
-                             const uint8_t *second_pred) {
-  static const search_neighbors neighbors[8] = {
-    { { -1, 0 }, -1 * SEARCH_GRID_STRIDE_8P + 0 },
-    { { 0, -1 }, 0 * SEARCH_GRID_STRIDE_8P - 1 },
-    { { 0, 1 }, 0 * SEARCH_GRID_STRIDE_8P + 1 },
-    { { 1, 0 }, 1 * SEARCH_GRID_STRIDE_8P + 0 },
-    { { -1, -1 }, -1 * SEARCH_GRID_STRIDE_8P - 1 },
-    { { 1, -1 }, 1 * SEARCH_GRID_STRIDE_8P - 1 },
-    { { -1, 1 }, -1 * SEARCH_GRID_STRIDE_8P + 1 },
-    { { 1, 1 }, 1 * SEARCH_GRID_STRIDE_8P + 1 }
-  };
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const what = &x->plane[0].src;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
-  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
-  MV *best_mv = &x->best_mv.as_mv;
-  unsigned int best_sad = INT_MAX;
-  int i, j;
-  uint8_t do_refine_search_grid[SEARCH_GRID_STRIDE_8P * SEARCH_GRID_STRIDE_8P] =
-      { 0 };
-  int grid_center = SEARCH_GRID_CENTER_8P;
-  int grid_coord = grid_center;
-
-  clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
-           x->mv_limits.row_min, x->mv_limits.row_max);
-  if (mask) {
-    best_sad = fn_ptr->msdf(what->buf, what->stride,
-                            get_buf_from_mv(in_what, best_mv), in_what->stride,
-                            second_pred, mask, mask_stride, invert_mask) +
-               mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
-  } else {
-    if (xd->jcp_param.use_jnt_comp_avg)
-      best_sad = fn_ptr->jsdaf(what->buf, what->stride,
-                               get_buf_from_mv(in_what, best_mv),
-                               in_what->stride, second_pred, &xd->jcp_param) +
-                 mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
-    else
-      best_sad = fn_ptr->sdaf(what->buf, what->stride,
-                              get_buf_from_mv(in_what, best_mv),
-                              in_what->stride, second_pred) +
-                 mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
-  }
-
-  do_refine_search_grid[grid_coord] = 1;
-
-  for (i = 0; i < search_range; ++i) {
-    int best_site = -1;
-
-    for (j = 0; j < 8; ++j) {
-      grid_coord = grid_center + neighbors[j].coord_offset;
-      if (do_refine_search_grid[grid_coord] == 1) {
-        continue;
-      }
-      const MV mv = { best_mv->row + neighbors[j].coord.row,
-                      best_mv->col + neighbors[j].coord.col };
-
-      do_refine_search_grid[grid_coord] = 1;
-      if (is_mv_in(&x->mv_limits, &mv)) {
-        unsigned int sad;
-        if (mask) {
-          sad = fn_ptr->msdf(what->buf, what->stride,
-                             get_buf_from_mv(in_what, &mv), in_what->stride,
-                             second_pred, mask, mask_stride, invert_mask);
-        } else {
-          if (xd->jcp_param.use_jnt_comp_avg)
-            sad = fn_ptr->jsdaf(what->buf, what->stride,
-                                get_buf_from_mv(in_what, &mv), in_what->stride,
-                                second_pred, &xd->jcp_param);
-          else
-            sad = fn_ptr->sdaf(what->buf, what->stride,
-                               get_buf_from_mv(in_what, &mv), in_what->stride,
-                               second_pred);
-        }
-        if (sad < best_sad) {
-          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
-          if (sad < best_sad) {
-            best_sad = sad;
-            best_site = j;
-          }
-        }
-      }
-    }
-
-    if (best_site == -1) {
-      break;
-    } else {
-      best_mv->row += neighbors[best_site].coord.row;
-      best_mv->col += neighbors[best_site].coord.col;
-      grid_center += neighbors[best_site].coord_offset;
-    }
-  }
-  return best_sad;
-}
-
-#define MIN_EX_SEARCH_LIMIT 128
-static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  const int max_ex =
-      AOMMAX(MIN_EX_SEARCH_LIMIT,
-             (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
-
-  return sf->allow_exhaustive_searches &&
-         (sf->exhaustive_searches_thresh < INT_MAX) &&
-         (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
-}
-
-int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                          MV *mvp_full, int step_param, int method,
-                          int run_mesh_search, int error_per_bit,
-                          int *cost_list, const MV *ref_mv, int var_max, int rd,
-                          int x_pos, int y_pos, int intra) {
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
-  int var = 0;
-
-  if (cost_list) {
-    cost_list[0] = INT_MAX;
-    cost_list[1] = INT_MAX;
-    cost_list[2] = INT_MAX;
-    cost_list[3] = INT_MAX;
-    cost_list[4] = INT_MAX;
-  }
-
-  // Keep track of number of searches (this frame in this thread).
-  ++(*x->m_search_count_ptr);
-
-  switch (method) {
-    case FAST_DIAMOND:
-      var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
-                            cost_list, fn_ptr, 1, ref_mv);
-      break;
-    case FAST_HEX:
-      var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
-                            cost_list, fn_ptr, 1, ref_mv);
-      break;
-    case HEX:
-      var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
-                           fn_ptr, 1, ref_mv);
-      break;
-    case SQUARE:
-      var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
-                          fn_ptr, 1, ref_mv);
-      break;
-    case BIGDIA:
-      var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
-                          fn_ptr, 1, ref_mv);
-      break;
-    case NSTEP:
-      var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
-                               MAX_MVSEARCH_STEPS - 1 - step_param, 1,
-                               cost_list, fn_ptr, ref_mv);
-
-      // Should we allow a follow on exhaustive search?
-      if (is_exhaustive_allowed(cpi, x)) {
-        int exhuastive_thr = sf->exhaustive_searches_thresh;
-        exhuastive_thr >>=
-            10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
-
-        // Threshold variance for an exhaustive full search.
-        if (var > exhuastive_thr) {
-          int var_ex;
-          MV tmp_mv_ex;
-          var_ex =
-              full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
-                                    cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
-
-          if (var_ex < var) {
-            var = var_ex;
-            x->best_mv.as_mv = tmp_mv_ex;
-          }
-        }
-      }
-      break;
-    default: assert(0 && "Invalid search method.");
-  }
-
-  // Should we allow a follow on exhaustive search?
-  if (!run_mesh_search) {
-    if (method == NSTEP) {
-      if (is_exhaustive_allowed(cpi, x)) {
-        int exhuastive_thr = sf->exhaustive_searches_thresh;
-        exhuastive_thr >>=
-            10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
-        // Threshold variance for an exhaustive full search.
-        if (var > exhuastive_thr) run_mesh_search = 1;
-      }
-    }
-  }
-
-  if (run_mesh_search) {
-    int var_ex;
-    MV tmp_mv_ex;
-    var_ex = full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
-                                   cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
-    if (var_ex < var) {
-      var = var_ex;
-      x->best_mv.as_mv = tmp_mv_ex;
-    }
-  }
-
-  if (method != NSTEP && rd && var < var_max)
-    var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
-
-  do {
-    if (!intra || !av1_use_hash_me(&cpi->common)) break;
-
-    // already single ME
-    // get block size and original buffer of current block
-    const int block_height = block_size_high[bsize];
-    const int block_width = block_size_wide[bsize];
-    if (block_height == block_width && x_pos >= 0 && y_pos >= 0) {
-      if (block_width == 4 || block_width == 8 || block_width == 16 ||
-          block_width == 32 || block_width == 64 || block_width == 128) {
-        uint8_t *what = x->plane[0].src.buf;
-        const int what_stride = x->plane[0].src.stride;
-        uint32_t hash_value1, hash_value2;
-        MV best_hash_mv;
-        int best_hash_cost = INT_MAX;
-
-        // for the hashMap
-        hash_table *ref_frame_hash =
-            intra
-                ? &cpi->common.cur_frame->hash_table
-                : av1_get_ref_frame_hash_map(cpi, x->e_mbd.mi[0]->ref_frame[0]);
-
-        av1_get_block_hash_value(
-            what, what_stride, block_width, &hash_value1, &hash_value2,
-            x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, x);
-
-        const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
-        // for intra, at lest one matching can be found, itself.
-        if (count <= (intra ? 1 : 0)) {
-          break;
-        }
-
-        Iterator iterator =
-            av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
-        for (int i = 0; i < count; i++, iterator_increment(&iterator)) {
-          block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator));
-          if (hash_value2 == ref_block_hash.hash_value2) {
-            // For intra, make sure the prediction is from valid area.
-            if (intra) {
-              const int mi_col = x_pos / MI_SIZE;
-              const int mi_row = y_pos / MI_SIZE;
-              const MV dv = { 8 * (ref_block_hash.y - y_pos),
-                              8 * (ref_block_hash.x - x_pos) };
-              if (!av1_is_dv_valid(dv, &cpi->common, &x->e_mbd, mi_row, mi_col,
-                                   bsize, cpi->common.seq_params.mib_size_log2))
-                continue;
-            }
-            MV hash_mv;
-            hash_mv.col = ref_block_hash.x - x_pos;
-            hash_mv.row = ref_block_hash.y - y_pos;
-            if (!is_mv_in(&x->mv_limits, &hash_mv)) continue;
-            const int refCost =
-                av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
-            if (refCost < best_hash_cost) {
-              best_hash_cost = refCost;
-              best_hash_mv = hash_mv;
-            }
-          }
-        }
-        if (best_hash_cost < var) {
-          x->second_best_mv = x->best_mv;
-          x->best_mv.as_mv = best_hash_mv;
-          var = best_hash_cost;
-        }
-      }
-    }
-  } while (0);
-
-  return var;
-}
-
-/* returns subpixel variance error function */
-#define DIST(r, c) \
-  vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
-
-/* checks if (r, c) has better score than previous best */
-#define MVC(r, c)                                                              \
-  (unsigned int)(mvcost                                                        \
-                     ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +              \
-                         mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
-                            error_per_bit +                                    \
-                        4096) >>                                               \
-                           13                                                  \
-                     : 0)
-
-#define CHECK_BETTER(v, r, c)                             \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
-    thismse = (DIST(r, c));                               \
-    if ((v = MVC(r, c) + thismse) < besterr) {            \
-      besterr = v;                                        \
-      br = r;                                             \
-      bc = c;                                             \
-      *distortion = thismse;                              \
-      *sse1 = sse;                                        \
-    }                                                     \
-  } else {                                                \
-    v = INT_MAX;                                          \
-  }
-
-#undef CHECK_BETTER0
-#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
-
-#undef CHECK_BETTER1
-#define CHECK_BETTER1(v, r, c)                                                \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                     \
-    MV this_mv = { r, c };                                                    \
-    thismse = upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, &this_mv,     \
-                                        mask, vfp, z, pre(y, y_stride, r, c), \
-                                        y_stride, sp(c), sp(r), w, h, &sse,   \
-                                        use_accurate_subpel_search);          \
-    if ((v = MVC(r, c) + thismse) < besterr) {                                \
-      besterr = v;                                                            \
-      br = r;                                                                 \
-      bc = c;                                                                 \
-      *distortion = thismse;                                                  \
-      *sse1 = sse;                                                            \
-    }                                                                         \
-  } else {                                                                    \
-    v = INT_MAX;                                                              \
-  }
-
-static unsigned int setup_obmc_center_error(
-    const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
-    const uint8_t *const y, int y_stride, int offset, int *mvjcost,
-    int *mvcost[2], unsigned int *sse1, int *distortion) {
-  unsigned int besterr;
-  besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
-  *distortion = besterr;
-  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-  return besterr;
-}
-
-static int upsampled_obmc_pref_error(
-    MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *const mv, const int32_t *mask, const aom_variance_fn_ptr_t *vfp,
-    const int32_t *const wsrc, const uint8_t *const y, int y_stride,
-    int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse,
-    int subpel_search) {
-  unsigned int besterr;
-
-  DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred);
-    aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h,
-                              subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd,
-                              subpel_search);
-    besterr = vfp->ovf(pred8, w, wsrc, mask, sse);
-  } else {
-    aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3,
-                       subpel_y_q3, y, y_stride, subpel_search);
-
-    besterr = vfp->ovf(pred, w, wsrc, mask, sse);
-  }
-  return besterr;
-}
-
-static unsigned int upsampled_setup_obmc_center_error(
-    MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
-    const uint8_t *const y, int y_stride, int w, int h, int offset,
-    int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion,
-    int subpel_search) {
-  unsigned int besterr = upsampled_obmc_pref_error(
-      xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0,
-      0, w, h, sse1, subpel_search);
-  *distortion = besterr;
-  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-  return besterr;
-}
-
-int av1_find_best_obmc_sub_pixel_tree_up(
-    MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
-    int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
-    int is_second, int use_accurate_subpel_search) {
-  const int32_t *wsrc = x->wsrc_buf;
-  const int32_t *mask = x->mask_buf;
-  const int *const z = wsrc;
-  const int *const src_address = z;
-  MACROBLOCKD *xd = &x->e_mbd;
-  struct macroblockd_plane *const pd = &xd->plane[0];
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  unsigned int besterr = INT_MAX;
-  unsigned int sse;
-  unsigned int thismse;
-
-  int rr = ref_mv->row;
-  int rc = ref_mv->col;
-  int br = bestmv->row * 8;
-  int bc = bestmv->col * 8;
-  int hstep = 4;
-  int iter;
-  int round = 3 - forced_stop;
-  int tr = br;
-  int tc = bc;
-  const MV *search_step = search_step_table;
-  int idx, best_idx = -1;
-  unsigned int cost_array[5];
-  int kr, kc;
-  const int w = block_size_wide[mbmi->sb_type];
-  const int h = block_size_high[mbmi->sb_type];
-  int offset;
-  int y_stride;
-  const uint8_t *y;
-
-  int minc, maxc, minr, maxr;
-
-  set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
-
-  y = pd->pre[is_second].buf;
-  y_stride = pd->pre[is_second].stride;
-  offset = bestmv->row * y_stride + bestmv->col;
-
-  if (!allow_hp)
-    if (round == 3) round = 2;
-
-  bestmv->row *= 8;
-  bestmv->col *= 8;
-  // use_accurate_subpel_search can be 0 or 1 or 2
-  if (use_accurate_subpel_search)
-    besterr = upsampled_setup_obmc_center_error(
-        xd, cm, mi_row, mi_col, mask, bestmv, ref_mv, error_per_bit, vfp, z, y,
-        y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion,
-        use_accurate_subpel_search);
-  else
-    besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
-                                      z, y, y_stride, offset, mvjcost, mvcost,
-                                      sse1, distortion);
-
-  for (iter = 0; iter < round; ++iter) {
-    // Check vertical and horizontal sub-pixel positions.
-    for (idx = 0; idx < 4; ++idx) {
-      tr = br + search_step[idx].row;
-      tc = bc + search_step[idx].col;
-      if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
-        MV this_mv = { tr, tc };
-        if (use_accurate_subpel_search) {
-          thismse = upsampled_obmc_pref_error(
-              xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
-              pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
-              use_accurate_subpel_search);
-        } else {
-          thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc),
-                              sp(tr), src_address, mask, &sse);
-        }
-
-        cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
-                                                mvcost, error_per_bit);
-        if (cost_array[idx] < besterr) {
-          best_idx = idx;
-          besterr = cost_array[idx];
-          *distortion = thismse;
-          *sse1 = sse;
-        }
-      } else {
-        cost_array[idx] = INT_MAX;
-      }
-    }
-
-    // Check diagonal sub-pixel position
-    kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
-    kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
-
-    tc = bc + kc;
-    tr = br + kr;
-    if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
-      MV this_mv = { tr, tc };
-
-      if (use_accurate_subpel_search) {
-        thismse = upsampled_obmc_pref_error(
-            xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address,
-            pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse,
-            use_accurate_subpel_search);
-      } else {
-        thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
-                            src_address, mask, &sse);
-      }
-
-      cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
-                                            error_per_bit);
-
-      if (cost_array[4] < besterr) {
-        best_idx = 4;
-        besterr = cost_array[4];
-        *distortion = thismse;
-        *sse1 = sse;
-      }
-    } else {
-      cost_array[idx] = INT_MAX;
-    }
-
-    if (best_idx < 4 && best_idx >= 0) {
-      br += search_step[best_idx].row;
-      bc += search_step[best_idx].col;
-    } else if (best_idx == 4) {
-      br = tr;
-      bc = tc;
-    }
-
-    if (iters_per_step > 1 && best_idx != -1) {
-      if (use_accurate_subpel_search) {
-        SECOND_LEVEL_CHECKS_BEST(1);
-      } else {
-        SECOND_LEVEL_CHECKS_BEST(0);
-      }
-    }
-
-    tr = br;
-    tc = bc;
-
-    search_step += 4;
-    hstep >>= 1;
-    best_idx = -1;
-  }
-
-  // These lines insure static analysis doesn't warn that
-  // tr and tc aren't used after the above point.
-  (void)tr;
-  (void)tc;
-
-  bestmv->row = br;
-  bestmv->col = bc;
-
-  return besterr;
-}
-
-#undef DIST
-#undef MVC
-#undef CHECK_BETTER
-
-static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc,
-                               const int32_t *mask, const MV *best_mv,
-                               const MV *center_mv,
-                               const aom_variance_fn_ptr_t *vfp, int use_mvcost,
-                               int is_second) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
-  const MV mv = { best_mv->row * 8, best_mv->col * 8 };
-  unsigned int unused;
-
-  return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc,
-                  mask, &unused) +
-         (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
-                                   x->errorperbit)
-                     : 0);
-}
-
-int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc,
-                             const int32_t *mask, MV *ref_mv, int error_per_bit,
-                             int search_range,
-                             const aom_variance_fn_ptr_t *fn_ptr,
-                             const MV *center_mv, int is_second) {
-  const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
-  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
-  unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
-                                       in_what->stride, wsrc, mask) +
-                          mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
-  int i, j;
-
-  for (i = 0; i < search_range; i++) {
-    int best_site = -1;
-
-    for (j = 0; j < 4; j++) {
-      const MV mv = { ref_mv->row + neighbors[j].row,
-                      ref_mv->col + neighbors[j].col };
-      if (is_mv_in(&x->mv_limits, &mv)) {
-        unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
-                                        in_what->stride, wsrc, mask);
-        if (sad < best_sad) {
-          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
-          if (sad < best_sad) {
-            best_sad = sad;
-            best_site = j;
-          }
-        }
-      }
-    }
-
-    if (best_site == -1) {
-      break;
-    } else {
-      ref_mv->row += neighbors[best_site].row;
-      ref_mv->col += neighbors[best_site].col;
-    }
-  }
-  return best_sad;
-}
-
-int obmc_diamond_search_sad(const MACROBLOCK *x, const search_site_config *cfg,
-                            const int32_t *wsrc, const int32_t *mask,
-                            MV *ref_mv, MV *best_mv, int search_param,
-                            int sad_per_bit, int *num00,
-                            const aom_variance_fn_ptr_t *fn_ptr,
-                            const MV *center_mv, int is_second) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
-  // search_param determines the length of the initial step and hence the number
-  // of iterations
-  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
-  // (MAX_FIRST_STEP/4) pel... etc.
-  const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
-  const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
-  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
-  const uint8_t *best_address, *in_what_ref;
-  int best_sad = INT_MAX;
-  int best_site = 0;
-  int last_site = 0;
-  int i, j, step;
-
-  clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
-           x->mv_limits.row_min, x->mv_limits.row_max);
-  in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
-  best_address = in_what_ref;
-  *num00 = 0;
-  *best_mv = *ref_mv;
-
-  // Check the starting position
-  best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
-             mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
-  i = 1;
-
-  for (step = 0; step < tot_steps; step++) {
-    for (j = 0; j < cfg->searches_per_step; j++) {
-      const MV mv = { best_mv->row + ss[i].mv.row,
-                      best_mv->col + ss[i].mv.col };
-      if (is_mv_in(&x->mv_limits, &mv)) {
-        int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
-                               wsrc, mask);
-        if (sad < best_sad) {
-          sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
-          if (sad < best_sad) {
-            best_sad = sad;
-            best_site = i;
-          }
-        }
-      }
-
-      i++;
-    }
-
-    if (best_site != last_site) {
-      best_mv->row += ss[best_site].mv.row;
-      best_mv->col += ss[best_site].mv.col;
-      best_address += ss[best_site].offset;
-      last_site = best_site;
-#if defined(NEW_DIAMOND_SEARCH)
-      while (1) {
-        const MV this_mv = { best_mv->row + ss[best_site].mv.row,
-                             best_mv->col + ss[best_site].mv.col };
-        if (is_mv_in(&x->mv_limits, &this_mv)) {
-          int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
-                                 in_what->stride, wsrc, mask);
-          if (sad < best_sad) {
-            sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-            if (sad < best_sad) {
-              best_sad = sad;
-              best_mv->row += ss[best_site].mv.row;
-              best_mv->col += ss[best_site].mv.col;
-              best_address += ss[best_site].offset;
-              continue;
-            }
-          }
-        }
-        break;
-      }
-#endif
-    } else if (best_address == in_what_ref) {
-      (*num00)++;
-    }
-  }
-  return best_sad;
-}
-
-static int obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
-                                   MV *mvp_full, int step_param, int sadpb,
-                                   int further_steps, int do_refine,
-                                   const aom_variance_fn_ptr_t *fn_ptr,
-                                   const MV *ref_mv, MV *dst_mv,
-                                   int is_second) {
-  const int32_t *wsrc = x->wsrc_buf;
-  const int32_t *mask = x->mask_buf;
-  MV temp_mv;
-  int thissme, n, num00 = 0;
-  int bestsme =
-      obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full, &temp_mv,
-                              step_param, sadpb, &n, fn_ptr, ref_mv, is_second);
-  if (bestsme < INT_MAX)
-    bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1,
-                                  is_second);
-  *dst_mv = temp_mv;
-
-  // If there won't be more n-step search, check to see if refining search is
-  // needed.
-  if (n > further_steps) do_refine = 0;
-
-  while (n < further_steps) {
-    ++n;
-
-    if (num00) {
-      num00--;
-    } else {
-      thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full,
-                                        &temp_mv, step_param + n, sadpb, &num00,
-                                        fn_ptr, ref_mv, is_second);
-      if (thissme < INT_MAX)
-        thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr,
-                                      1, is_second);
-
-      // check to see if refining search is needed.
-      if (num00 > further_steps - n) do_refine = 0;
-
-      if (thissme < bestsme) {
-        bestsme = thissme;
-        *dst_mv = temp_mv;
-      }
-    }
-  }
-
-  // final 1-away diamond refining search
-  if (do_refine) {
-    const int search_range = 8;
-    MV best_mv = *dst_mv;
-    thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb,
-                                       search_range, fn_ptr, ref_mv, is_second);
-    if (thissme < INT_MAX)
-      thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1,
-                                    is_second);
-    if (thissme < bestsme) {
-      bestsme = thissme;
-      *dst_mv = best_mv;
-    }
-  }
-  return bestsme;
-}
-
-int av1_obmc_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
-                               int step_param, int sadpb, int further_steps,
-                               int do_refine,
-                               const aom_variance_fn_ptr_t *fn_ptr,
-                               const MV *ref_mv, MV *dst_mv, int is_second) {
-  if (cpi->sf.obmc_full_pixel_search_level == 0) {
-    return obmc_full_pixel_diamond(cpi, x, mvp_full, step_param, sadpb,
-                                   further_steps, do_refine, fn_ptr, ref_mv,
-                                   dst_mv, is_second);
-  } else {
-    const int32_t *wsrc = x->wsrc_buf;
-    const int32_t *mask = x->mask_buf;
-    const int search_range = 8;
-    *dst_mv = *mvp_full;
-    clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max,
-             x->mv_limits.row_min, x->mv_limits.row_max);
-    int thissme = obmc_refining_search_sad(
-        x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second);
-    if (thissme < INT_MAX)
-      thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1,
-                                    is_second);
-    return thissme;
-  }
-}
-
-// Note(yunqingwang): The following 2 functions are only used in the motion
-// vector unit test, which return extreme motion vectors allowed by the MV
-// limits.
-#define COMMON_MV_TEST              \
-  SETUP_SUBPEL_SEARCH;              \
-                                    \
-  (void)error_per_bit;              \
-  (void)vfp;                        \
-  (void)src_address;                \
-  (void)src_stride;                 \
-  (void)y;                          \
-  (void)y_stride;                   \
-  (void)second_pred;                \
-  (void)w;                          \
-  (void)h;                          \
-  (void)use_accurate_subpel_search; \
-  (void)offset;                     \
-  (void)mvjcost;                    \
-  (void)mvcost;                     \
-  (void)sse1;                       \
-  (void)distortion;                 \
-                                    \
-  (void)halfiters;                  \
-  (void)quarteriters;               \
-  (void)eighthiters;                \
-  (void)whichdir;                   \
-  (void)forced_stop;                \
-  (void)hstep;                      \
-                                    \
-  (void)tr;                         \
-  (void)tc;                         \
-  (void)sse;                        \
-  (void)thismse;                    \
-  (void)cost_list;
-// Return the maximum MV.
-int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm,
-                                int mi_row, int mi_col, const MV *ref_mv,
-                                int allow_hp, int error_per_bit,
-                                const aom_variance_fn_ptr_t *vfp,
-                                int forced_stop, int iters_per_step,
-                                int *cost_list, int *mvjcost, int *mvcost[2],
-                                int *distortion, unsigned int *sse1,
-                                const uint8_t *second_pred, const uint8_t *mask,
-                                int mask_stride, int invert_mask, int w, int h,
-                                int use_accurate_subpel_search) {
-  COMMON_MV_TEST;
-  (void)mask;
-  (void)mask_stride;
-  (void)invert_mask;
-  (void)minr;
-  (void)minc;
-
-  (void)cm;
-  (void)mi_row;
-  (void)mi_col;
-
-  bestmv->row = maxr;
-  bestmv->col = maxc;
-  besterr = 0;
-  // In the sub-pel motion search, if hp is not used, then the last bit of mv
-  // has to be 0.
-  lower_mv_precision(bestmv, allow_hp, 0);
-  return besterr;
-}
-// Return the minimum MV.
-int av1_return_min_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm,
-                                int mi_row, int mi_col, const MV *ref_mv,
-                                int allow_hp, int error_per_bit,
-                                const aom_variance_fn_ptr_t *vfp,
-                                int forced_stop, int iters_per_step,
-                                int *cost_list, int *mvjcost, int *mvcost[2],
-                                int *distortion, unsigned int *sse1,
-                                const uint8_t *second_pred, const uint8_t *mask,
-                                int mask_stride, int invert_mask, int w, int h,
-                                int use_accurate_subpel_search) {
-  COMMON_MV_TEST;
-  (void)maxr;
-  (void)maxc;
-  (void)mask;
-  (void)mask_stride;
-  (void)invert_mask;
-
-  (void)cm;
-  (void)mi_row;
-  (void)mi_col;
-
-  bestmv->row = minr;
-  bestmv->col = minc;
-  besterr = 0;
-  // In the sub-pel motion search, if hp is not used, then the last bit of mv
-  // has to be 0.
-  lower_mv_precision(bestmv, allow_hp, 0);
-  return besterr;
-}
diff --git a/third_party/aom/av1/encoder/mcomp.h b/third_party/aom/av1/encoder/mcomp.h
deleted file mode 100644
index a975218b0..000000000
--- a/third_party/aom/av1/encoder/mcomp.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_MCOMP_H_
-#define AOM_AV1_ENCODER_MCOMP_H_
-
-#include "av1/encoder/block.h"
-#include "aom_dsp/variance.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// The maximum number of steps in a step search given the largest
-// allowed initial step
-#define MAX_MVSEARCH_STEPS 11
-// Max full pel mv specified in the unit of full pixel
-// Enable the use of motion vector in range [-1023, 1023].
-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1)
-// Maximum size of the first step in full pel units
-#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1))
-// Allowed motion vector pixel distance outside image border
-// for Block_16x16
-#define BORDER_MV_PIXELS_B16 (16 + AOM_INTERP_EXTEND)
-
-#define SEARCH_RANGE_8P 3
-#define SEARCH_GRID_STRIDE_8P (2 * SEARCH_RANGE_8P + 1)
-#define SEARCH_GRID_CENTER_8P \
-  (SEARCH_RANGE_8P * SEARCH_GRID_STRIDE_8P + SEARCH_RANGE_8P)
-
-// motion search site
-typedef struct search_site {
-  MV mv;
-  int offset;
-} search_site;
-
-typedef struct search_site_config {
-  search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
-  int ss_count;
-  int searches_per_step;
-} search_site_config;
-
-typedef struct {
-  MV coord;
-  int coord_offset;
-} search_neighbors;
-
-void av1_init_dsmotion_compensation(search_site_config *cfg, int stride);
-void av1_init3smotion_compensation(search_site_config *cfg, int stride);
-
-void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv);
-
-int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
-                    int *mvcost[2], int weight);
-
-// Utility to compute variance + MV rate cost for a given MV
-int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
-                       const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
-                       int use_mvcost);
-int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
-                          const MV *center_mv, const uint8_t *second_pred,
-                          const aom_variance_fn_ptr_t *vfp, int use_mvcost);
-int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
-                            const MV *center_mv, const uint8_t *second_pred,
-                            const uint8_t *mask, int mask_stride,
-                            int invert_mask, const aom_variance_fn_ptr_t *vfp,
-                            int use_mvcost);
-
-struct AV1_COMP;
-struct SPEED_FEATURES;
-
-int av1_init_search_range(int size);
-
-int av1_refining_search_sad(struct macroblock *x, MV *ref_mv, int sad_per_bit,
-                            int distance, const aom_variance_fn_ptr_t *fn_ptr,
-                            const MV *center_mv);
-
-// Runs sequence of diamond searches in smaller steps for RD.
-int av1_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                           MV *mvp_full, int step_param, int sadpb,
-                           int further_steps, int do_refine, int *cost_list,
-                           const aom_variance_fn_ptr_t *fn_ptr,
-                           const MV *ref_mv, MV *dst_mv);
-
-int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
-                   int sad_per_bit, int do_init_search, int *cost_list,
-                   const aom_variance_fn_ptr_t *vfp, int use_mvcost,
-                   const MV *center_mv);
-
-typedef int(fractional_mv_step_fp)(
-    MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    const MV *ref_mv, int allow_hp, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp,
-    int forced_stop,  // 0 - full, 1 - qtr only, 2 - half only
-    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
-    int *distortion, unsigned int *sse1, const uint8_t *second_pred,
-    const uint8_t *mask, int mask_stride, int invert_mask, int w, int h,
-    int use_accurate_subpel_search);
-
-extern fractional_mv_step_fp av1_find_best_sub_pixel_tree;
-extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned;
-extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned_more;
-extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned_evenmore;
-extern fractional_mv_step_fp av1_return_max_sub_pixel_mv;
-extern fractional_mv_step_fp av1_return_min_sub_pixel_mv;
-
-typedef int (*av1_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv,
-                                    int sad_per_bit, int distance,
-                                    const aom_variance_fn_ptr_t *fn_ptr,
-                                    const MV *center_mv, MV *best_mv);
-
-typedef int (*av1_diamond_search_fn_t)(
-    MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv,
-    int search_param, int sad_per_bit, int *num00,
-    const aom_variance_fn_ptr_t *fn_ptr, const MV *center_mv);
-
-int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
-                             const aom_variance_fn_ptr_t *fn_ptr,
-                             const uint8_t *mask, int mask_stride,
-                             int invert_mask, const MV *center_mv,
-                             const uint8_t *second_pred);
-
-int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                          BLOCK_SIZE bsize, MV *mvp_full, int step_param,
-                          int method, int run_mesh_search, int error_per_bit,
-                          int *cost_list, const MV *ref_mv, int var_max, int rd,
-                          int x_pos, int y_pos, int intra);
-
-int av1_obmc_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                               MV *mvp_full, int step_param, int sadpb,
-                               int further_steps, int do_refine,
-                               const aom_variance_fn_ptr_t *fn_ptr,
-                               const MV *ref_mv, MV *dst_mv, int is_second);
-int av1_find_best_obmc_sub_pixel_tree_up(
-    MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
-    MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
-    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
-    int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
-    int is_second, int use_accurate_subpel_search);
-
-unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi,
-                                     MACROBLOCK *const x, BLOCK_SIZE bsize,
-                                     int mi_row, int mi_col, const MV *this_mv);
-unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
-                                  MACROBLOCK *const x, BLOCK_SIZE bsize,
-                                  int mi_row, int mi_col, int *pts0,
-                                  int *pts_inref0, int total_samples);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_MCOMP_H_
diff --git a/third_party/aom/av1/encoder/mips/msa/error_msa.c b/third_party/aom/av1/encoder/mips/msa/error_msa.c
deleted file mode 100644
index 2e86dee43..000000000
--- a/third_party/aom/av1/encoder/mips/msa/error_msa.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize)                                     \
-  static int64_t block_error_##BSize##size_msa(                              \
-      const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
-    int64_t err = 0;                                                         \
-    uint32_t loop_cnt;                                                       \
-    v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h;                             \
-    v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w;                              \
-    v2i64 sq_coeff_r, sq_coeff_l;                                            \
-    v2i64 err0, err_dup0, err1, err_dup1;                                    \
-                                                                             \
-    coeff = LD_SH(coeff_ptr);                                                \
-    dq_coeff = LD_SH(dq_coeff_ptr);                                          \
-    UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
-    ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
-    HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
-    DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r,      \
-                sq_coeff_l);                                                 \
-    DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1);                 \
-                                                                             \
-    coeff = LD_SH(coeff_ptr + 8);                                            \
-    dq_coeff = LD_SH(dq_coeff_ptr + 8);                                      \
-    UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
-    ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
-    HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
-    DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);              \
-    DPADD_SD2_SD(diff_r, diff_l, err0, err1);                                \
-                                                                             \
-    coeff_ptr += 16;                                                         \
-    dq_coeff_ptr += 16;                                                      \
-                                                                             \
-    for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) {                       \
-      coeff = LD_SH(coeff_ptr);                                              \
-      dq_coeff = LD_SH(dq_coeff_ptr);                                        \
-      UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
-      ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
-      HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
-      DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
-      DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
-                                                                             \
-      coeff = LD_SH(coeff_ptr + 8);                                          \
-      dq_coeff = LD_SH(dq_coeff_ptr + 8);                                    \
-      UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
-      ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
-      HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
-      DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
-      DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
-                                                                             \
-      coeff_ptr += 16;                                                       \
-      dq_coeff_ptr += 16;                                                    \
-    }                                                                        \
-                                                                             \
-    err_dup0 = __msa_splati_d(sq_coeff_r, 1);                                \
-    err_dup1 = __msa_splati_d(sq_coeff_l, 1);                                \
-    sq_coeff_r += err_dup0;                                                  \
-    sq_coeff_l += err_dup1;                                                  \
-    *ssz = __msa_copy_s_d(sq_coeff_r, 0);                                    \
-    *ssz += __msa_copy_s_d(sq_coeff_l, 0);                                   \
-                                                                             \
-    err_dup0 = __msa_splati_d(err0, 1);                                      \
-    err_dup1 = __msa_splati_d(err1, 1);                                      \
-    err0 += err_dup0;                                                        \
-    err1 += err_dup1;                                                        \
-    err = __msa_copy_s_d(err0, 0);                                           \
-    err += __msa_copy_s_d(err1, 0);                                          \
-                                                                             \
-    return err;                                                              \
-  }
-
-/* clang-format off */
-BLOCK_ERROR_BLOCKSIZE_MSA(16)
-BLOCK_ERROR_BLOCKSIZE_MSA(64)
-BLOCK_ERROR_BLOCKSIZE_MSA(256)
-BLOCK_ERROR_BLOCKSIZE_MSA(1024)
-/* clang-format on */
-
-int64_t av1_block_error_msa(const tran_low_t *coeff_ptr,
-                            const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
-                            int64_t *ssz) {
-  int64_t err;
-  const int16_t *coeff = (const int16_t *)coeff_ptr;
-  const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
-
-  switch (blk_size) {
-    case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
-    case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
-    case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
-    case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
-    default:
-      err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
-      break;
-  }
-
-  return err;
-}
diff --git a/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c b/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c
deleted file mode 100644
index 085c08bfb..000000000
--- a/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/enums.h"
-
-void av1_fwht4x4_msa(const int16_t *input, int16_t *output,
-                     int32_t src_stride) {
-  v8i16 in0, in1, in2, in3, in4;
-
-  LD_SH4(input, src_stride, in0, in1, in2, in3);
-
-  in0 += in1;
-  in3 -= in2;
-  in4 = (in0 - in3) >> 1;
-  SUB2(in4, in1, in4, in2, in1, in2);
-  in0 -= in2;
-  in3 += in1;
-
-  TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
-
-  in0 += in2;
-  in1 -= in3;
-  in4 = (in0 - in1) >> 1;
-  SUB2(in4, in2, in4, in3, in2, in3);
-  in0 -= in3;
-  in1 += in2;
-
-  SLLI_4V(in0, in1, in2, in3, 2);
-
-  TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2);
-
-  ST4x2_UB(in0, output, 4);
-  ST4x2_UB(in3, output + 4, 4);
-  ST4x2_UB(in1, output + 8, 4);
-  ST4x2_UB(in2, output + 12, 4);
-}
diff --git a/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c b/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c
deleted file mode 100644
index 531ae090a..000000000
--- a/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride,
-                                            uint8_t *frm2_ptr, int32_t filt_sth,
-                                            int32_t filt_wgt, uint32_t *acc,
-                                            uint16_t *cnt) {
-  uint32_t row;
-  uint64_t f0, f1, f2, f3;
-  v16i8 frm2, frm1 = { 0 };
-  v16i8 frm4, frm3 = { 0 };
-  v16u8 frm_r, frm_l;
-  v8i16 frm2_r, frm2_l;
-  v8i16 diff0, diff1, mod0_h, mod1_h;
-  v4i32 cnst3, cnst16, filt_wt, strength;
-  v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
-  v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
-  v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
-  v4i32 acc0, acc1, acc2, acc3;
-  v8i16 cnt0, cnt1;
-
-  filt_wt = __msa_fill_w(filt_wgt);
-  strength = __msa_fill_w(filt_sth);
-  cnst3 = __msa_ldi_w(3);
-  cnst16 = __msa_ldi_w(16);
-
-  for (row = 2; row--;) {
-    LD4(frm1_ptr, stride, f0, f1, f2, f3);
-    frm1_ptr += (4 * stride);
-
-    LD_SB2(frm2_ptr, 16, frm2, frm4);
-    frm2_ptr += 32;
-
-    LD_SW2(acc, 4, acc0, acc1);
-    LD_SW2(acc + 8, 4, acc2, acc3);
-    LD_SH2(cnt, 8, cnt0, cnt1);
-
-    INSERT_D2_SB(f0, f1, frm1);
-    INSERT_D2_SB(f2, f3, frm3);
-    ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
-    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
-    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
-    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
-    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
-    diff0_r = (mod0_w < cnst16);
-    diff0_l = (mod1_w < cnst16);
-    diff1_r = (mod2_w < cnst16);
-    diff1_l = (mod3_w < cnst16);
-
-    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-
-    mod0_w = diff0_r & mod0_w;
-    mod1_w = diff0_l & mod1_w;
-    mod2_w = diff1_r & mod2_w;
-    mod3_w = diff1_l & mod3_w;
-
-    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
-    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
-    ST_SH2(mod0_h, mod1_h, cnt, 8);
-    cnt += 16;
-
-    UNPCK_UB_SH(frm2, frm2_r, frm2_l);
-    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
-    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
-    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
-         mod2_w, mod3_w);
-
-    ST_SW2(mod0_w, mod1_w, acc, 4);
-    acc += 8;
-    ST_SW2(mod2_w, mod3_w, acc, 4);
-    acc += 8;
-
-    LD_SW2(acc, 4, acc0, acc1);
-    LD_SW2(acc + 8, 4, acc2, acc3);
-    LD_SH2(cnt, 8, cnt0, cnt1);
-
-    ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
-    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
-    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
-    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
-    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
-    diff0_r = (mod0_w < cnst16);
-    diff0_l = (mod1_w < cnst16);
-    diff1_r = (mod2_w < cnst16);
-    diff1_l = (mod3_w < cnst16);
-
-    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-
-    mod0_w = diff0_r & mod0_w;
-    mod1_w = diff0_l & mod1_w;
-    mod2_w = diff1_r & mod2_w;
-    mod3_w = diff1_l & mod3_w;
-
-    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
-    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
-    ST_SH2(mod0_h, mod1_h, cnt, 8);
-    cnt += 16;
-    UNPCK_UB_SH(frm4, frm2_r, frm2_l);
-    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
-    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
-    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
-         mod2_w, mod3_w);
-
-    ST_SW2(mod0_w, mod1_w, acc, 4);
-    acc += 8;
-    ST_SW2(mod2_w, mod3_w, acc, 4);
-    acc += 8;
-  }
-}
-
-static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride,
-                                             uint8_t *frm2_ptr,
-                                             int32_t filt_sth, int32_t filt_wgt,
-                                             uint32_t *acc, uint16_t *cnt) {
-  uint32_t row;
-  v16i8 frm1, frm2, frm3, frm4;
-  v16u8 frm_r, frm_l;
-  v16i8 zero = { 0 };
-  v8u16 frm2_r, frm2_l;
-  v8i16 diff0, diff1, mod0_h, mod1_h;
-  v4i32 cnst3, cnst16, filt_wt, strength;
-  v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
-  v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
-  v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
-  v4i32 acc0, acc1, acc2, acc3;
-  v8i16 cnt0, cnt1;
-
-  filt_wt = __msa_fill_w(filt_wgt);
-  strength = __msa_fill_w(filt_sth);
-  cnst3 = __msa_ldi_w(3);
-  cnst16 = __msa_ldi_w(16);
-
-  for (row = 8; row--;) {
-    LD_SB2(frm1_ptr, stride, frm1, frm3);
-    frm1_ptr += stride;
-
-    LD_SB2(frm2_ptr, 16, frm2, frm4);
-    frm2_ptr += 16;
-
-    LD_SW2(acc, 4, acc0, acc1);
-    LD_SW2(acc, 4, acc2, acc3);
-    LD_SH2(cnt, 8, cnt0, cnt1);
-
-    ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
-    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
-    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
-    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
-    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
-    diff0_r = (mod0_w < cnst16);
-    diff0_l = (mod1_w < cnst16);
-    diff1_r = (mod2_w < cnst16);
-    diff1_l = (mod3_w < cnst16);
-
-    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-
-    mod0_w = diff0_r & mod0_w;
-    mod1_w = diff0_l & mod1_w;
-    mod2_w = diff1_r & mod2_w;
-    mod3_w = diff1_l & mod3_w;
-
-    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
-    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
-    ST_SH2(mod0_h, mod1_h, cnt, 8);
-    cnt += 16;
-
-    ILVRL_B2_UH(zero, frm2, frm2_r, frm2_l);
-    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
-    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
-    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
-         mod2_w, mod3_w);
-
-    ST_SW2(mod0_w, mod1_w, acc, 4);
-    acc += 8;
-    ST_SW2(mod2_w, mod3_w, acc, 4);
-    acc += 8;
-
-    LD_SW2(acc, 4, acc0, acc1);
-    LD_SW2(acc + 8, 4, acc2, acc3);
-    LD_SH2(cnt, 8, cnt0, cnt1);
-
-    ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
-    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
-    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
-    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
-    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
-    diff0_r = (mod0_w < cnst16);
-    diff0_l = (mod1_w < cnst16);
-    diff1_r = (mod2_w < cnst16);
-    diff1_l = (mod3_w < cnst16);
-
-    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-
-    mod0_w = diff0_r & mod0_w;
-    mod1_w = diff0_l & mod1_w;
-    mod2_w = diff1_r & mod2_w;
-    mod3_w = diff1_l & mod3_w;
-
-    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
-    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
-    ST_SH2(mod0_h, mod1_h, cnt, 8);
-    cnt += 16;
-
-    ILVRL_B2_UH(zero, frm4, frm2_r, frm2_l);
-    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
-    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
-    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
-         mod2_w, mod3_w);
-    ST_SW2(mod0_w, mod1_w, acc, 4);
-    acc += 8;
-    ST_SW2(mod2_w, mod3_w, acc, 4);
-    acc += 8;
-
-    frm1_ptr += stride;
-    frm2_ptr += 16;
-  }
-}
-
-void av1_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
-                                   uint8_t *frame2_ptr, uint32_t blk_w,
-                                   uint32_t blk_h, int32_t strength,
-                                   int32_t filt_wgt, uint32_t *accu,
-                                   uint16_t *cnt) {
-  if (8 == (blk_w * blk_h)) {
-    temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength,
-                                    filt_wgt, accu, cnt);
-  } else if (16 == (blk_w * blk_h)) {
-    temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength,
-                                     filt_wgt, accu, cnt);
-  } else {
-    av1_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
-                                strength, filt_wgt, accu, cnt);
-  }
-}
diff --git a/third_party/aom/av1/encoder/ml.c b/third_party/aom/av1/encoder/ml.c
deleted file mode 100644
index d21def43a..000000000
--- a/third_party/aom/av1/encoder/ml.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "av1/encoder/ml.h"
-
-void av1_nn_predict(const float *features, const NN_CONFIG *nn_config,
-                    float *output) {
-  int num_input_nodes = nn_config->num_inputs;
-  int buf_index = 0;
-  float buf[2][NN_MAX_NODES_PER_LAYER];
-  const float *input_nodes = features;
-
-  // Propagate hidden layers.
-  const int num_layers = nn_config->num_hidden_layers;
-  assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
-  for (int layer = 0; layer < num_layers; ++layer) {
-    const float *weights = nn_config->weights[layer];
-    const float *bias = nn_config->bias[layer];
-    float *output_nodes = buf[buf_index];
-    const int num_output_nodes = nn_config->num_hidden_nodes[layer];
-    assert(num_output_nodes < NN_MAX_NODES_PER_LAYER);
-    for (int node = 0; node < num_output_nodes; ++node) {
-      float val = 0.0f;
-      for (int i = 0; i < num_input_nodes; ++i)
-        val += weights[i] * input_nodes[i];
-      val += bias[node];
-      // ReLU as activation function.
-      val = val > 0.0f ? val : 0.0f;  // Could use AOMMAX().
-      output_nodes[node] = val;
-      weights += num_input_nodes;
-    }
-    num_input_nodes = num_output_nodes;
-    input_nodes = output_nodes;
-    buf_index = 1 - buf_index;
-  }
-
-  // Final output layer.
-  const float *weights = nn_config->weights[num_layers];
-  for (int node = 0; node < nn_config->num_outputs; ++node) {
-    const float *bias = nn_config->bias[num_layers];
-    float val = 0.0f;
-    for (int i = 0; i < num_input_nodes; ++i)
-      val += weights[i] * input_nodes[i];
-    output[node] = val + bias[node];
-    weights += num_input_nodes;
-  }
-}
-
-void av1_nn_softmax(const float *input, float *output, int n) {
-  // Softmax function is invariant to adding the same constant
-  // to all input values, so we subtract the maximum input to avoid
-  // possible overflow.
-  float max_inp = input[0];
-  for (int i = 1; i < n; i++) max_inp = AOMMAX(max_inp, input[i]);
-  float sum_out = 0.0f;
-  for (int i = 0; i < n; i++) {
-    output[i] = (float)exp(input[i] - max_inp);
-    sum_out += output[i];
-  }
-  for (int i = 0; i < n; i++) output[i] /= sum_out;
-}
diff --git a/third_party/aom/av1/encoder/ml.h b/third_party/aom/av1/encoder/ml.h
deleted file mode 100644
index cb8ef2871..000000000
--- a/third_party/aom/av1/encoder/ml.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_ML_H_
-#define AOM_AV1_ENCODER_ML_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define NN_MAX_HIDDEN_LAYERS 10
-#define NN_MAX_NODES_PER_LAYER 128
-
-typedef struct {
-  int num_inputs;         // Number of input nodes, i.e. features.
-  int num_outputs;        // Number of output nodes.
-  int num_hidden_layers;  // Number of hidden layers, maximum 10.
-  // Number of nodes for each hidden layer.
-  int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS];
-  // Weight parameters, indexed by layer.
-  const float *weights[NN_MAX_HIDDEN_LAYERS + 1];
-  // Bias parameters, indexed by layer.
-  const float *bias[NN_MAX_HIDDEN_LAYERS + 1];
-} NN_CONFIG;
-
-// Calculate prediction based on the given input features and neural net config.
-// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
-// layer.
-void av1_nn_predict(const float *features, const NN_CONFIG *nn_config,
-                    float *output);
-
-// Applies the softmax normalization function to the input
-// to get a valid probability distribution in the output:
-// output[i] = exp(input[i]) / sum_{k \in [0,n)}(exp(input[k]))
-void av1_nn_softmax(const float *input, float *output, int n);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_ML_H_
diff --git a/third_party/aom/av1/encoder/palette.c b/third_party/aom/av1/encoder/palette.c
deleted file mode 100644
index e61cd02ce..000000000
--- a/third_party/aom/av1/encoder/palette.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-
-#include "av1/encoder/cost.h"
-#include "av1/encoder/palette.h"
-#include "av1/encoder/random.h"
-
-#define AV1_K_MEANS_DIM 1
-#include "av1/encoder/k_means_template.h"
-#undef AV1_K_MEANS_DIM
-#define AV1_K_MEANS_DIM 2
-#include "av1/encoder/k_means_template.h"
-#undef AV1_K_MEANS_DIM
-
-static int int_comparer(const void *a, const void *b) {
-  return (*(int *)a - *(int *)b);
-}
-
-int av1_remove_duplicates(int *centroids, int num_centroids) {
-  int num_unique;  // number of unique centroids
-  int i;
-  qsort(centroids, num_centroids, sizeof(*centroids), int_comparer);
-  // Remove duplicates.
-  num_unique = 1;
-  for (i = 1; i < num_centroids; ++i) {
-    if (centroids[i] != centroids[i - 1]) {  // found a new unique centroid
-      centroids[num_unique++] = centroids[i];
-    }
-  }
-  return num_unique;
-}
-
-static int delta_encode_cost(const int *colors, int num, int bit_depth,
-                             int min_val) {
-  if (num <= 0) return 0;
-  int bits_cost = bit_depth;
-  if (num == 1) return bits_cost;
-  bits_cost += 2;
-  int max_delta = 0;
-  int deltas[PALETTE_MAX_SIZE];
-  const int min_bits = bit_depth - 3;
-  for (int i = 1; i < num; ++i) {
-    const int delta = colors[i] - colors[i - 1];
-    deltas[i - 1] = delta;
-    assert(delta >= min_val);
-    if (delta > max_delta) max_delta = delta;
-  }
-  int bits_per_delta = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits);
-  assert(bits_per_delta <= bit_depth);
-  int range = (1 << bit_depth) - colors[0] - min_val;
-  for (int i = 0; i < num - 1; ++i) {
-    bits_cost += bits_per_delta;
-    range -= deltas[i];
-    bits_per_delta = AOMMIN(bits_per_delta, av1_ceil_log2(range));
-  }
-  return bits_cost;
-}
-
-int av1_index_color_cache(const uint16_t *color_cache, int n_cache,
-                          const uint16_t *colors, int n_colors,
-                          uint8_t *cache_color_found, int *out_cache_colors) {
-  if (n_cache <= 0) {
-    for (int i = 0; i < n_colors; ++i) out_cache_colors[i] = colors[i];
-    return n_colors;
-  }
-  memset(cache_color_found, 0, n_cache * sizeof(*cache_color_found));
-  int n_in_cache = 0;
-  int in_cache_flags[PALETTE_MAX_SIZE];
-  memset(in_cache_flags, 0, sizeof(in_cache_flags));
-  for (int i = 0; i < n_cache && n_in_cache < n_colors; ++i) {
-    for (int j = 0; j < n_colors; ++j) {
-      if (colors[j] == color_cache[i]) {
-        in_cache_flags[j] = 1;
-        cache_color_found[i] = 1;
-        ++n_in_cache;
-        break;
-      }
-    }
-  }
-  int j = 0;
-  for (int i = 0; i < n_colors; ++i)
-    if (!in_cache_flags[i]) out_cache_colors[j++] = colors[i];
-  assert(j == n_colors - n_in_cache);
-  return j;
-}
-
-int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
-                                 int bit_depth, int *zero_count,
-                                 int *min_bits) {
-  const int n = pmi->palette_size[1];
-  const int max_val = 1 << bit_depth;
-  int max_d = 0;
-  *min_bits = bit_depth - 4;
-  *zero_count = 0;
-  for (int i = 1; i < n; ++i) {
-    const int delta = pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] -
-                      pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1];
-    const int v = abs(delta);
-    const int d = AOMMIN(v, max_val - v);
-    if (d > max_d) max_d = d;
-    if (d == 0) ++(*zero_count);
-  }
-  return AOMMAX(av1_ceil_log2(max_d + 1), *min_bits);
-}
-
-int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi,
-                             uint16_t *color_cache, int n_cache,
-                             int bit_depth) {
-  const int n = pmi->palette_size[0];
-  int out_cache_colors[PALETTE_MAX_SIZE];
-  uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
-  const int n_out_cache =
-      av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n,
-                            cache_color_found, out_cache_colors);
-  const int total_bits =
-      n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 1);
-  return av1_cost_literal(total_bits);
-}
-
-int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
-                              uint16_t *color_cache, int n_cache,
-                              int bit_depth) {
-  const int n = pmi->palette_size[1];
-  int total_bits = 0;
-  // U channel palette color cost.
-  int out_cache_colors[PALETTE_MAX_SIZE];
-  uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
-  const int n_out_cache = av1_index_color_cache(
-      color_cache, n_cache, pmi->palette_colors + PALETTE_MAX_SIZE, n,
-      cache_color_found, out_cache_colors);
-  total_bits +=
-      n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 0);
-
-  // V channel palette color cost.
-  int zero_count = 0, min_bits_v = 0;
-  const int bits_v =
-      av1_get_palette_delta_bits_v(pmi, bit_depth, &zero_count, &min_bits_v);
-  const int bits_using_delta =
-      2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count;
-  const int bits_using_raw = bit_depth * n;
-  total_bits += 1 + AOMMIN(bits_using_delta, bits_using_raw);
-  return av1_cost_literal(total_bits);
-}
diff --git a/third_party/aom/av1/encoder/palette.h b/third_party/aom/av1/encoder/palette.h
deleted file mode 100644
index 8b88c4755..000000000
--- a/third_party/aom/av1/encoder/palette.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_PALETTE_H_
-#define AOM_AV1_ENCODER_PALETTE_H_
-
-#include "av1/common/blockd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define AV1_K_MEANS_RENAME(func, dim) func##_dim##dim
-
-void AV1_K_MEANS_RENAME(av1_calc_indices, 1)(const int *data,
-                                             const int *centroids,
-                                             uint8_t *indices, int n, int k);
-void AV1_K_MEANS_RENAME(av1_calc_indices, 2)(const int *data,
-                                             const int *centroids,
-                                             uint8_t *indices, int n, int k);
-void AV1_K_MEANS_RENAME(av1_k_means, 1)(const int *data, int *centroids,
-                                        uint8_t *indices, int n, int k,
-                                        int max_itr);
-void AV1_K_MEANS_RENAME(av1_k_means, 2)(const int *data, int *centroids,
-                                        uint8_t *indices, int n, int k,
-                                        int max_itr);
-
-// Given 'n' 'data' points and 'k' 'centroids' each of dimension 'dim',
-// calculate the centroid 'indices' for the data points.
-static INLINE void av1_calc_indices(const int *data, const int *centroids,
-                                    uint8_t *indices, int n, int k, int dim) {
-  if (dim == 1) {
-    AV1_K_MEANS_RENAME(av1_calc_indices, 1)(data, centroids, indices, n, k);
-  } else if (dim == 2) {
-    AV1_K_MEANS_RENAME(av1_calc_indices, 2)(data, centroids, indices, n, k);
-  } else {
-    assert(0 && "Untemplated k means dimension");
-  }
-}
-
-// Given 'n' 'data' points and an initial guess of 'k' 'centroids' each of
-// dimension 'dim', runs up to 'max_itr' iterations of k-means algorithm to get
-// updated 'centroids' and the centroid 'indices' for elements in 'data'.
-// Note: the output centroids are rounded off to nearest integers.
-static INLINE void av1_k_means(const int *data, int *centroids,
-                               uint8_t *indices, int n, int k, int dim,
-                               int max_itr) {
-  if (dim == 1) {
-    AV1_K_MEANS_RENAME(av1_k_means, 1)(data, centroids, indices, n, k, max_itr);
-  } else if (dim == 2) {
-    AV1_K_MEANS_RENAME(av1_k_means, 2)(data, centroids, indices, n, k, max_itr);
-  } else {
-    assert(0 && "Untemplated k means dimension");
-  }
-}
-
-// Given a list of centroids, returns the unique number of centroids 'k', and
-// puts these unique centroids in first 'k' indices of 'centroids' array.
-// Ideally, the centroids should be rounded to integers before calling this
-// method.
-int av1_remove_duplicates(int *centroids, int num_centroids);
-
-// Given a color cache and a set of base colors, find if each cache color is
-// present in the base colors, record the binary results in "cache_color_found".
-// Record the colors that are not in the color cache in "out_cache_colors".
-int av1_index_color_cache(const uint16_t *color_cache, int n_cache,
-                          const uint16_t *colors, int n_colors,
-                          uint8_t *cache_color_found, int *out_cache_colors);
-
-// Return the number of bits used to transmit each v palette color delta;
-// assign zero_count with the number of deltas being 0.
-int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
-                                 int bit_depth, int *zero_count, int *min_bits);
-
-// Return the rate cost for transmitting luma palette color values.
-int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi,
-                             uint16_t *color_cache, int n_cache, int bit_depth);
-
-// Return the rate cost for transmitting chroma palette color values.
-int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
-                              uint16_t *color_cache, int n_cache,
-                              int bit_depth);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_PALETTE_H_
diff --git a/third_party/aom/av1/encoder/partition_model_weights.h b/third_party/aom/av1/encoder/partition_model_weights.h
deleted file mode 100644
index 437ea43f9..000000000
--- a/third_party/aom/av1/encoder/partition_model_weights.h
+++ /dev/null
@@ -1,2448 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_
-#define AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/ml.h"
-
-#define FEATURE_SIZE 10
-#define LABEL_SIZE 16
-// nn model for ab partition pruning, 128x128.
-static const float av1_ab_partition_nn_weights_128_layer0[FEATURE_SIZE * 64] = {
-  -0.715251f, -0.015767f, -0.667353f, -0.345255f, 0.177887f,  -0.469759f,
-  0.426152f,  0.489798f,  0.469865f,  0.773821f,  0.088517f,  0.074585f,
-  0.838754f,  0.048449f,  -0.007584f, 0.638968f,  0.233305f,  -0.319236f,
-  -0.257124f, -0.170869f, 0.137180f,  0.114852f,  -0.721241f, -0.947962f,
-  -0.411298f, 0.494306f,  -0.060435f, -0.648421f, -0.126624f, 0.072686f,
-  -0.143904f, -0.115839f, -0.175527f, -0.117728f, 0.040686f,  -0.189925f,
-  0.134361f,  -0.258070f, -0.177558f, 0.158049f,  0.168668f,  -0.062919f,
-  0.341986f,  0.038100f,  -0.435577f, -0.321255f, 0.203213f,  0.213061f,
-  0.533304f,  0.359296f,  -0.079558f, 0.004637f,  0.663904f,  0.043779f,
-  0.383018f,  1.136559f,  -0.084155f, 0.333057f,  -0.199011f, 0.152059f,
-  -0.078419f, -0.167752f, -0.093651f, 0.083171f,  -0.190143f, 0.086195f,
-  -0.280632f, -0.160663f, -0.017298f, 0.122628f,  -0.138116f, 0.062927f,
-  0.222462f,  0.626979f,  0.426928f,  0.117170f,  -0.240457f, 0.053750f,
-  0.038017f,  0.007359f,  -0.017595f, 0.101407f,  0.332891f,  0.074933f,
-  0.306498f,  0.219380f,  -0.151638f, -0.247976f, 0.343405f,  0.121256f,
-  0.049173f,  0.171474f,  -0.139608f, -1.016599f, -0.345553f, -0.901138f,
-  0.243401f,  0.059928f,  -0.089396f, -0.195565f, 0.364705f,  -0.020400f,
-  -1.383672f, 0.413018f,  0.536950f,  -0.020904f, -1.335306f, -0.732290f,
-  0.102885f,  0.315290f,  -0.208521f, -0.081811f, 0.182300f,  0.125712f,
-  -0.593833f, -0.220639f, -0.314155f, 0.188327f,  0.118503f,  0.524427f,
-  -1.083859f, -1.130640f, 0.390352f,  -0.045591f, 0.113160f,  -0.009149f,
-  -0.096183f, 0.115829f,  0.377752f,  0.318396f,  -0.591983f, 0.004797f,
-  -0.497377f, -0.342248f, 0.079546f,  -0.025249f, -0.295972f, 0.615501f,
-  -0.464372f, 0.418315f,  -0.173556f, 0.105217f,  0.298073f,  0.082478f,
-  0.033223f,  0.977341f,  -0.372982f, -0.052337f, 0.154124f,  0.396787f,
-  0.536654f,  -0.139061f, -0.223702f, 0.229666f,  -0.846766f, 0.107723f,
-  0.563839f,  -0.483141f, 0.304813f,  -0.765283f, 0.070964f,  0.151101f,
-  0.275188f,  0.490303f,  1.175892f,  0.085377f,  -0.191200f, 0.544532f,
-  -0.365075f, 0.167546f,  0.052183f,  -0.220529f, -0.212227f, -0.144988f,
-  -0.273356f, -0.062023f, 0.103993f,  -0.238493f, -0.161204f, -0.054611f,
-  -0.166672f, 0.128327f,  0.461751f,  -0.545822f, 0.739798f,  0.594386f,
-  -0.163192f, -0.332501f, 0.363834f,  -0.065043f, 0.474812f,  -0.138811f,
-  0.170924f,  -0.778142f, -0.316474f, -0.508065f, -0.039986f, -0.478001f,
-  0.340591f,  0.041783f,  0.055419f,  0.015155f,  -0.981830f, -1.355237f,
-  0.347516f,  1.155327f,  0.081319f,  0.274163f,  -0.327230f, -0.113478f,
-  0.556552f,  -0.055986f, 0.217318f,  -0.445351f, 0.325759f,  0.526547f,
-  -0.657434f, -0.572214f, -0.037087f, 0.081384f,  0.064518f,  0.014892f,
-  0.215279f,  1.834504f,  -0.242107f, 0.079810f,  0.129558f,  0.079588f,
-  -0.035189f, -0.221745f, -0.163414f, 0.043978f,  -1.028662f, -0.623609f,
-  1.130336f,  0.664661f,  -0.063975f, -0.415863f, 0.018581f,  0.157758f,
-  0.200570f,  0.063420f,  0.901039f,  -0.746286f, 0.196230f,  -0.290592f,
-  0.042373f,  -0.502500f, 0.183638f,  0.103394f,  -0.298858f, 0.145436f,
-  0.196916f,  0.108319f,  -0.448572f, -0.881385f, 0.302497f,  0.121679f,
-  -0.021327f, 0.025150f,  0.481306f,  -0.359634f, 0.350257f,  -0.228647f,
-  -0.669860f, 0.260025f,  -0.034182f, 0.619247f,  -0.158826f, -0.405864f,
-  0.674112f,  -0.027885f, -0.325274f, -0.241492f, 0.036024f,  -0.437685f,
-  -0.091458f, -0.109295f, -0.350676f, 0.044706f,  0.297059f,  0.016290f,
-  1.121203f,  1.289062f,  -1.299476f, -1.129221f, 0.103752f,  0.131302f,
-  -0.263265f, 0.222155f,  -0.229908f, 0.013922f,  -0.226001f, -0.248383f,
-  -0.004415f, -0.020958f, 0.055634f,  0.086200f,  0.114556f,  -0.184061f,
-  -0.096210f, -0.146466f, -0.249618f, -0.195998f, 0.088758f,  0.023781f,
-  -0.264460f, 0.157026f,  -0.235228f, -0.102564f, 0.043463f,  -0.187823f,
-  -0.257500f, -0.199049f, -0.242210f, 0.030448f,  0.221604f,  0.151804f,
-  -0.100404f, -0.073931f, 0.144749f,  -0.001572f, -1.438079f, -0.233716f,
-  0.733422f,  1.727080f,  -0.036397f, 0.027551f,  0.425321f,  0.085703f,
-  0.031186f,  0.032333f,  -0.675130f, 1.437733f,  -0.202392f, -0.525003f,
-  0.087048f,  0.328194f,  -0.079989f, -0.391088f, -0.238732f, -0.120660f,
-  -0.139600f, 0.154665f,  0.026202f,  -0.233501f, -0.009046f, -0.149187f,
-  -0.199646f, 0.115375f,  0.209762f,  -0.014875f, 0.124038f,  -0.119985f,
-  1.079625f,  -0.461513f, 0.614114f,  0.021003f,  0.439449f,  -0.824834f,
-  -0.299701f, 0.193817f,  -0.870551f, -1.262313f, -0.079517f, 0.341570f,
-  0.305310f,  -0.089721f, -0.317314f, -0.075631f, 0.127172f,  -0.208635f,
-  1.191922f,  0.163141f,  0.564285f,  0.286352f,  0.480865f,  0.173094f,
-  -0.094034f, -0.071339f, -0.328992f, -0.006382f, 0.314705f,  0.090258f,
-  -0.016099f, 0.193230f,  0.188061f,  0.398144f,  0.722781f,  0.769949f,
-  0.025442f,  -0.162016f, 0.070192f,  -0.056946f, -0.100957f, -0.219934f,
-  -0.203492f, -0.015454f, -0.013272f, -0.098008f, 0.051707f,  -0.017493f,
-  0.527446f,  0.083605f,  0.588318f,  0.878215f,  0.028747f,  -0.146479f,
-  -0.345170f, -0.136059f, -0.152005f, -0.203634f, 0.232702f,  -0.101340f,
-  -0.027733f, -0.282611f, 0.265366f,  0.082362f,  -0.265420f, -0.131124f,
-  0.166303f,  0.040194f,  -0.100710f, 0.579151f,  -0.530136f, 0.163422f,
-  -0.998821f, -1.565311f, -1.774785f, -2.493372f, 0.116970f,  -0.090302f,
-  1.723272f,  0.552370f,  -0.295954f, -0.439095f, -0.266730f, 0.027936f,
-  0.539616f,  -0.234902f, -0.167601f, -0.149877f, -0.242983f, 0.122353f,
-  -0.121620f, -0.205517f, -0.180144f, -0.264208f, 0.151500f,  -0.159378f,
-  0.029145f,  -0.050892f, -0.223407f, -0.246239f, 0.043152f,  -0.018460f,
-  0.169972f,  -0.187769f, -0.034670f, -0.238330f, 0.288070f,  -0.093243f,
-  -0.437105f, -0.573376f, 0.660073f,  0.285727f,  0.408470f,  0.158475f,
-  0.032699f,  0.056280f,  -0.237176f, -0.083003f, 0.105598f,  -0.169522f,
-  -0.260420f, -0.121100f, -0.173983f, -0.195693f, -0.232028f, 0.224940f,
-  0.029124f,  0.009580f,  -0.252034f, 0.103087f,  1.156561f,  0.603848f,
-  -0.562805f, -1.652742f, -0.568288f, -1.829395f, 0.046169f,  0.076095f,
-  1.490819f,  0.415893f,  -0.277788f, -0.115787f, 0.093750f,  0.270726f,
-  -0.395983f, -0.353742f, 0.034605f,  0.005342f,  0.184537f,  0.086445f,
-  0.156417f,  1.476367f,  0.122587f,  0.002145f,  0.431057f,  -0.381184f,
-  -1.646457f, -0.014009f, -0.671224f, 0.193726f,  -0.019247f, -0.031267f,
-  -0.046208f, 0.298733f,  0.064734f,  0.616984f,  0.039381f,  0.182722f,
-  -0.116670f, 0.233093f,  -1.214374f, -0.817970f, -0.064394f, -0.584783f,
-  0.077697f,  -0.266720f, 0.130875f,  -0.235295f, -0.265754f, -0.159999f,
-  -0.250114f, -0.183017f, 0.194403f,  -0.105808f, -0.169215f, -0.240866f,
-  -0.026662f, -0.045123f, -0.036175f, -0.167471f, -0.192908f, -0.232602f,
-  -0.267036f, -0.112500f, -0.257944f, -0.111909f, -0.802226f, -0.008800f,
-  0.881460f,  -0.678603f, 0.008666f,  -0.252053f, -0.341035f, -0.175290f,
-  0.183012f,  0.385991f,  0.079888f,  -0.014039f, -0.148653f, 0.671778f,
-  -0.130219f, 1.086467f,  0.129267f,  -0.040400f, -0.201221f, -0.077005f,
-  0.015890f,  0.000781f,  0.137764f,  1.389546f,  0.172152f,  0.047279f,
-  -0.042783f, 0.127740f,  0.141467f,  -0.335738f, -1.396392f, 0.031496f,
-  0.357385f,  0.343602f,  -0.714553f, 0.311014f,  0.132845f,  0.061149f,
-  0.006796f,  0.568106f,  -0.255949f, 0.104134f,  -0.993447f, 0.298135f,
-  -0.406590f, -0.049228f, -0.578570f, -0.188561f, -0.107046f, 0.374095f,
-  0.068481f,  0.036240f,  -0.495801f, 0.180574f,  -0.766129f, 0.886967f,
-  -0.568868f, -0.936062f, -0.418886f, -0.058735f, -0.511964f, -0.438596f,
-  0.019016f,  -0.015837f, 0.600197f,  0.429773f,  0.315026f,  0.319667f,
-  0.214617f,  -0.017316f, 0.270257f,  -0.040524f, 0.695803f,  -0.015223f,
-  -1.554965f, 0.356997f,  -1.472428f, 0.024637f,  -0.562958f, 0.870351f,
-  0.193635f,  0.036063f,  0.328638f,  0.200274f,  -1.634707f, 0.110534f,
-  0.420104f,  -0.072042f, -0.006404f, 0.171680f,
-};
-
-static const float av1_ab_partition_nn_bias_128_layer0[64] = {
-  0.643147f,  -1.348826f, 0.431627f,  0.000000f,  0.102717f,  -0.772628f,
-  -0.034351f, -0.761977f, -0.638397f, 0.541969f,  -0.391311f, 0.563076f,
-  0.148553f,  0.267217f,  -0.788092f, 0.544573f,  -0.546280f, 0.000000f,
-  -0.446945f, 0.127732f,  0.270624f,  -0.219435f, -1.220203f, 0.324584f,
-  0.110885f,  0.276547f,  0.179726f,  -0.375160f, 0.026401f,  -0.032595f,
-  0.000000f,  -0.047932f, -0.648602f, -0.512637f, -0.031661f, -0.236761f,
-  0.476453f,  -0.028021f, -0.013673f, -0.015578f, -0.920077f, 0.000000f,
-  0.915351f,  -0.209962f, 0.000000f,  -0.025731f, 0.218288f,  0.000000f,
-  0.047726f,  -0.813077f, -1.263281f, 0.239087f,  0.278614f,  -0.030753f,
-  0.000000f,  0.346744f,  -0.948543f, -1.174211f, 0.216377f,  0.498913f,
-  0.853918f,  0.002504f,  -0.190403f, 0.452050f,
-};
-
-static const float av1_ab_partition_nn_weights_128_layer1[64 * LABEL_SIZE] = {
-  0.179769f,  1.499417f,  -0.445135f, -0.142278f, -0.337661f, 0.682064f,
-  -0.203213f, 0.302171f,  0.226877f,  -0.422169f, 1.687586f,  0.783773f,
-  0.220995f,  0.253482f,  0.370435f,  -1.342775f, 0.337229f,  -0.271473f,
-  0.291796f,  1.362227f,  -1.751397f, -0.086178f, 0.725496f,  -0.118597f,
-  0.227963f,  -0.501577f, 0.223849f,  -0.122421f, -0.123437f, -0.051045f,
-  -0.020115f, 0.212711f,  0.246025f,  0.088120f,  -0.168995f, 1.740190f,
-  -0.195098f, 0.680339f,  -0.589572f, -0.075244f, 0.878766f,  0.064092f,
-  -3.548527f, 0.001660f,  0.107926f,  -0.169501f, -0.455212f, 0.123045f,
-  -1.836998f, 0.330365f,  1.301475f,  0.454761f,  -0.576552f, -0.190761f,
-  0.208459f,  0.618483f,  1.383364f,  0.970718f,  0.390174f,  0.406252f,
-  -0.564519f, -0.312062f, 1.345712f,  -0.151873f, 0.109290f,  0.408847f,
-  0.391243f,  0.152024f,  0.181764f,  -0.036263f, -0.160466f, 0.153595f,
-  0.049163f,  -0.753012f, -1.804062f, 0.347475f,  -2.746580f, 0.575618f,
-  0.261799f,  0.210505f,  -0.302054f, -0.109872f, 0.199506f,  -1.182971f,
-  0.723668f,  0.177758f,  -0.338202f, 0.254396f,  -0.220023f, 0.043504f,
-  0.669866f,  -0.040816f, -0.402730f, 0.017990f,  0.215523f,  -0.216816f,
-  0.454826f,  -0.726067f, -0.018750f, -0.928679f, 0.154315f,  -0.465641f,
-  0.144566f,  -0.030064f, -0.054667f, -0.154055f, 0.625384f,  1.323795f,
-  -0.159496f, 0.097072f,  -0.463197f, -0.057938f, 0.750290f,  -0.233061f,
-  0.412631f,  -0.535223f, -0.151423f, -0.154583f, 0.024721f,  -0.494448f,
-  0.230594f,  -0.980138f, -0.653968f, 0.126079f,  0.051814f,  -0.053219f,
-  -0.421708f, -0.228853f, 0.237885f,  0.888157f,  0.059655f,  0.241295f,
-  0.210443f,  0.228238f,  0.119127f,  -0.051989f, -0.355408f, 0.182215f,
-  0.244277f,  -0.104577f, -0.558035f, -0.023270f, 0.054571f,  0.700646f,
-  -0.223006f, 0.115523f,  0.023391f,  0.437264f,  0.709477f,  -0.531212f,
-  -0.094731f, 0.328161f,  -0.105418f, -0.133511f, 0.497168f,  -0.030948f,
-  -0.407132f, -0.043943f, 0.155505f,  0.251945f,  0.205010f,  0.167160f,
-  0.083654f,  -0.636810f, 0.401315f,  -0.398414f, 0.290046f,  0.206846f,
-  0.042218f,  0.168150f,  0.843181f,  -0.671242f, -0.202392f, -0.073301f,
-  0.142895f,  0.237466f,  0.212145f,  -0.091828f, 0.187038f,  -0.720841f,
-  -0.616069f, -0.238021f, 0.065365f,  0.434119f,  0.179023f,  -0.040107f,
-  -0.430734f, -0.297368f, 0.575954f,  0.382619f,  -0.709787f, -0.320810f,
-  0.242342f,  -0.047614f, 0.705216f,  0.098077f,  0.357179f,  0.046017f,
-  0.115074f,  -0.412305f, -0.272304f, 0.048096f,  -0.803811f, 0.275000f,
-  0.642198f,  0.180286f,  -0.087178f, -0.112707f, -0.394443f, 0.201989f,
-  0.241759f,  -1.038870f, 0.728124f,  0.800559f,  -1.296268f, 0.198612f,
-  -0.053478f, 0.414344f,  -0.510529f, 0.124179f,  -2.219115f, -0.074583f,
-  -0.143055f, 0.001697f,  0.810811f,  -0.657140f, 0.186818f,  -0.936414f,
-  0.539578f,  -0.308244f, -0.126624f, -0.204767f, 0.091145f,  -0.049340f,
-  0.252014f,  0.394582f,  0.018764f,  -0.060377f, -0.019133f, 0.064083f,
-  0.069211f,  -0.526693f, 0.209850f,  -0.481466f, -0.468302f, -0.100407f,
-  0.241018f,  -1.037781f, 0.038539f,  -2.113840f, -0.974895f, 0.163187f,
-  0.425132f,  -0.772546f, -1.261254f, -0.217488f, -0.971748f, -0.805640f,
-  -0.745175f, -0.177077f, 0.217658f,  0.381431f,  -0.052338f, 0.087176f,
-  -0.165972f, 0.085937f,  0.472564f,  -0.796627f, -2.453307f, 0.569664f,
-  -0.233010f, -0.192134f, 0.064339f,  -0.111411f, -0.262469f, -0.410022f,
-  0.519993f,  -0.684620f, 0.393460f,  -0.277753f, -0.153624f, 0.528984f,
-  -0.415558f, -0.445863f, 0.588512f,  -0.142439f, -0.132127f, 0.199776f,
-  -0.579284f, 0.119488f,  -0.033590f, -0.503846f, -0.674979f, 0.335125f,
-  0.020519f,  0.233973f,  -0.297998f, -0.051511f, 0.518626f,  -0.412782f,
-  -0.074045f, 0.130523f,  0.465751f,  -0.117795f, 2.535813f,  0.352108f,
-  -0.499228f, 0.379784f,  0.056699f,  0.173142f,  -0.076519f, -0.026666f,
-  0.017834f,  0.492333f,  0.093364f,  0.037867f,  -0.165420f, -0.356429f,
-  -0.562334f, 0.057656f,  -0.307544f, 0.085857f,  -0.559851f, 0.107230f,
-  -0.398633f, 0.152618f,  -0.216835f, -0.024539f, 0.026044f,  -0.249519f,
-  -0.563594f, -0.746025f, 0.025265f,  -0.298888f, -0.185243f, 0.058794f,
-  0.233696f,  -0.115223f, 0.144617f,  -0.864390f, 0.619944f,  -0.023980f,
-  0.019481f,  0.225252f,  0.416552f,  -0.115993f, 0.935387f,  0.744386f,
-  0.053353f,  -0.052582f, -0.065650f, 0.228488f,  -0.032042f, -0.371252f,
-  -0.003638f, -0.736984f, -0.203776f, 0.030922f,  -0.065577f, -0.031643f,
-  -0.049253f, -0.054640f, 0.787134f,  0.545414f,  -0.140297f, -0.124274f,
-  -0.110011f, -0.029552f, 0.657005f,  0.214973f,  -0.374300f, 0.251642f,
-  0.276591f,  0.030566f,  -0.145470f, 0.350579f,  -0.356436f, -0.052694f,
-  -0.063966f, -0.751008f, -1.042392f, 0.328892f,  -0.425058f, -0.421571f,
-  -0.571889f, -1.141472f, -0.125216f, 0.212713f,  -0.485170f, -0.088791f,
-  0.124589f,  0.023237f,  0.077635f,  0.020901f,  -0.271402f, -0.321424f,
-  -0.513946f, -0.867872f, -0.284593f, 0.106276f,  0.220192f,  -0.143532f,
-  -0.014648f, 0.073402f,  0.327256f,  -0.139803f, 0.168763f,  0.048199f,
-  -0.122526f, 0.111713f,  -0.134257f, 0.810364f,  -0.085222f, -0.259221f,
-  -0.239349f, 0.044448f,  0.205031f,  0.413113f,  -0.107720f, -0.018816f,
-  -0.247741f, -0.004963f, 0.041170f,  -0.158019f, 0.134839f,  0.129502f,
-  0.800488f,  -1.041584f, -0.129336f, 0.170834f,  0.566586f,  -0.230443f,
-  0.437937f,  -0.149922f, -0.046665f, -0.094646f, 0.200070f,  0.072943f,
-  -0.076943f, -0.084971f, -0.515843f, -0.146720f, 0.472869f,  -0.444731f,
-  -0.100877f, 0.545196f,  -1.786626f, -0.482946f, 0.500509f,  -0.843257f,
-  0.200374f,  0.045103f,  -0.575718f, -0.164335f, -0.232522f, -0.021825f,
-  -0.139490f, 0.356058f,  -0.352075f, 0.061751f,  -0.200616f, -1.180921f,
-  -0.181355f, -0.137459f, 0.247574f,  0.181541f,  0.184314f,  -0.961482f,
-  0.493615f,  0.910261f,  -2.279238f, 0.648631f,  -0.055526f, -0.037137f,
-  0.038643f,  0.136609f,  -0.819373f, -0.040840f, -0.265989f, 0.006877f,
-  0.454651f,  -0.595323f, -0.099500f, -0.263717f, 0.150456f,  0.245077f,
-  -0.268666f, 0.162232f,  -0.516451f, -0.024501f, 0.188046f,  -0.002262f,
-  0.261319f,  0.004173f,  0.746982f,  0.174761f,  0.470447f,  -0.159558f,
-  -0.385240f, 0.023084f,  -0.133520f, -0.220607f, -0.018731f, -0.373558f,
-  -0.707763f, -1.850150f, -0.807404f, -0.168063f, -0.071435f, -0.160740f,
-  -0.478789f, -1.070674f, -0.489740f, -0.255796f, 0.100486f,  -0.153361f,
-  0.334394f,  -0.569472f, -0.198118f, 0.255922f,  0.104717f,  -0.065179f,
-  0.111879f,  -0.447237f, 1.373623f,  -0.190191f, -0.063311f, 0.337529f,
-  -0.138800f, 0.057009f,  -0.137006f, 0.641378f,  0.883147f,  -0.679655f,
-  0.267717f,  -0.351602f, -0.135225f, 0.229398f,  -0.513225f, -1.120345f,
-  0.528786f,  -0.051081f, 0.086653f,  0.140141f,  -0.563969f, 0.333402f,
-  -0.174745f, 0.321093f,  -0.438641f, -0.005131f, 0.247415f,  0.110120f,
-  -0.076308f, -0.083244f, 0.838944f,  -0.113043f, -0.013258f, -0.175028f,
-  -0.179941f, 0.272676f,  -0.047946f, -0.088076f, -0.450031f, 0.053929f,
-  -0.083549f, -0.089952f, -0.186253f, 0.257483f,  0.011019f,  0.586435f,
-  0.060580f,  -0.052078f, 0.090277f,  -0.780869f, 0.969811f,  -0.025349f,
-  -0.281917f, 0.014857f,  0.231863f,  -0.228601f, -0.003861f, 0.226550f,
-  0.141825f,  -0.102171f, -0.010387f, 0.220378f,  -2.561975f, -0.497071f,
-  -0.315117f, 0.371981f,  0.138247f,  0.625031f,  -0.308133f, -0.217876f,
-  0.005615f,  -0.860179f, 0.747491f,  0.006356f,  -0.057024f, -0.483189f,
-  0.055592f,  -0.316834f, 0.069858f,  0.218788f,  -0.200044f, 0.227588f,
-  0.215496f,  -0.055324f, -0.393147f, -0.394062f, -0.253264f, -0.075619f,
-  -0.152512f, -0.332995f, 0.129053f,  0.178668f,  -0.302694f, 0.030678f,
-  0.925896f,  0.964375f,  0.169021f,  -0.218657f, -0.627204f, 0.206437f,
-  -0.521336f, 0.176206f,  0.142733f,  0.139248f,  0.411682f,  0.181544f,
-  0.224850f,  -0.935547f, -0.558208f, 0.348096f,  0.342129f,  -0.389340f,
-  -0.236308f, -0.132099f, 0.073642f,  0.089391f,  -0.306901f, -0.397842f,
-  0.444282f,  0.074623f,  -0.051075f, -0.106617f, -0.184037f, -0.239046f,
-  -0.138761f, 0.120794f,  -0.647577f, -0.336471f, 0.527899f,  -0.164234f,
-  -0.028354f, 1.083678f,  -0.251534f, -0.145903f, -0.182783f, 0.070976f,
-  -0.199590f, -0.400306f, -0.029763f, -0.548042f, -0.266270f, -0.118084f,
-  -1.152632f, 0.383685f,  -0.105895f, -0.096829f, 0.118382f,  0.047447f,
-  -0.019051f, 0.310180f,  -0.162793f, -0.029574f, 0.058054f,  -0.636017f,
-  0.490639f,  0.158347f,  -0.385701f, -0.147057f, 1.285825f,  -1.276083f,
-  -0.021795f, -0.101600f, 0.163254f,  0.267160f,  -2.317864f, -0.098598f,
-  -0.296337f, -0.309017f, 0.164127f,  -0.270012f, -0.071187f, -0.262270f,
-  0.075415f,  -0.368328f, 0.186728f,  -0.158031f, 0.481663f,  0.515950f,
-  -0.162551f, 0.497981f,  0.262196f,  0.168479f,  0.726066f,  -0.243856f,
-  -0.058998f, 0.140168f,  0.053242f,  -0.624623f, -0.249480f, 0.055197f,
-  -1.376804f, 0.417571f,  0.203784f,  0.174370f,  -0.155531f, -0.029400f,
-  -0.491473f, 0.079811f,  -0.080123f, 1.345900f,  0.637077f,  0.434862f,
-  -1.787438f, 0.005756f,  -0.362706f, 0.179458f,  -0.288263f, 0.516788f,
-  -0.921248f, 0.043794f,  -0.137729f, -0.196171f, -0.046295f, -0.793781f,
-  -0.156532f, -0.132566f, 0.517989f,  -0.154321f, -0.054174f, -0.077900f,
-  -0.373316f, -0.117718f, 0.188986f,  -0.476188f, -0.245312f, 0.181439f,
-  -0.161024f, -0.229059f, -3.079907f, -0.225452f, -0.594355f, -0.558027f,
-  -0.135429f, 0.125766f,  -0.081314f, -0.350894f, -0.163165f, -1.936507f,
-  -0.205966f, 0.031472f,  0.744446f,  -0.006680f, -0.837551f, 0.605862f,
-  -0.854929f, -1.543750f, -0.307704f, -0.240517f, 0.178240f,  -0.183586f,
-  -0.010307f, 0.099373f,  -0.228278f, 0.175236f,  -0.000133f, 0.104491f,
-  -1.540545f, -0.570971f, -0.252885f, 0.483036f,  0.052531f,  0.260214f,
-  -0.515016f, -0.602081f, -0.485690f, -0.730710f, 0.163719f,  -1.775975f,
-  -0.298634f, 0.323626f,  -0.373579f, -0.872977f, 0.619574f,  0.026862f,
-  -0.122531f, -0.084698f, -2.436297f, 0.483996f,  -0.203640f, -0.302157f,
-  -0.150666f, -0.238320f, 0.089250f,  0.236485f,  -0.668654f, -0.122863f,
-  0.491152f,  -0.226444f, -0.181248f, 0.120158f,  0.294027f,  0.250056f,
-  0.307601f,  0.357875f,  -1.746455f, -0.175670f, 0.385447f,  -0.108808f,
-  -0.090235f, -0.642504f, -0.486004f, -0.055160f, -0.068692f, 0.009736f,
-  0.607555f,  -0.489426f, 0.150624f,  0.598114f,  -0.128816f, -0.445793f,
-  -0.066524f, -0.254380f, 0.227106f,  -0.406495f, -0.121632f, -0.275960f,
-  -0.136494f, 0.339457f,  -1.318132f, -0.417572f, -2.614077f, 0.324603f,
-  -0.001211f, 0.375192f,  -0.473448f, -0.162510f, 0.099329f,  -0.277965f,
-  0.101221f,  -0.060263f, 0.121867f,  -1.042140f, 0.440851f,  0.078898f,
-  -0.209007f, -0.243699f, 0.715197f,  -0.093997f, 0.086022f,  -0.178203f,
-  -2.275496f, -0.098413f, 0.199352f,  -0.526791f, -0.162086f, -0.197806f,
-  -0.231657f, -0.269202f, -0.794294f, -0.223461f, 0.503584f,  0.416236f,
-  0.064082f,  0.197655f,  0.340871f,  -0.186645f, -0.291498f, 0.433938f,
-  -1.110063f, 0.003751f,  0.392738f,  0.069360f,  0.102088f,  -0.302128f,
-  -1.518457f, 0.106939f,  0.404527f,  -0.306868f, -0.286928f, 0.729276f,
-  -0.531710f, 0.745048f,  -0.168837f, -1.953886f, -0.258828f, -0.190252f,
-  0.241877f,  -0.916744f, -0.030326f, -0.070541f, -0.271037f, 0.211303f,
-  -0.489957f, 0.100850f,  0.323999f,  -0.802837f, -0.462408f, -0.079350f,
-  -0.029374f, 0.131213f,  -0.825032f, 0.040202f,  0.351821f,  0.002869f,
-  -0.132516f, -0.471264f, -0.297002f, 0.263913f,  0.033478f,  0.146161f,
-  0.533229f,  -0.228608f, -0.200639f, -0.170955f, -0.915037f, 0.724491f,
-  0.005151f,  0.018584f,  -0.029771f, -0.396038f, -0.159236f, 0.038691f,
-  -1.197056f, 0.146302f,  0.226840f,  -0.852126f, 0.031214f,  0.108880f,
-  0.562000f,  -0.134633f, -0.713343f, -0.342252f, -1.764521f, -0.114653f,
-  0.515073f,  -0.080515f, -0.121155f, -0.865139f, -0.833694f, -0.368553f,
-  0.347673f,  0.623379f,  0.722067f,  -0.492458f, -0.513263f, 0.585167f,
-  0.721518f,  -0.693499f, 0.343725f,  -0.273861f, -0.040230f, -0.785664f,
-  -0.157500f, -0.308445f, 0.054062f,  0.600131f,  -0.860887f, 0.434470f,
-  -0.191382f, -0.306150f, -0.243965f, 0.705444f,  0.007789f,  -0.146154f,
-  -0.054499f, -0.073500f, -1.067364f, 0.404936f,  -2.864590f, 0.182323f,
-  0.326126f,  0.102405f,  -0.135800f, 1.128095f,  -0.012267f, -0.023996f,
-  -0.264834f, -0.108967f, -1.176746f, -0.926666f, 0.082999f,  -0.498361f,
-  0.083560f,  -0.210074f, 0.019225f,  -0.201614f, -0.904760f, 0.181421f,
-  0.586384f,  -0.177706f, 0.065471f,  0.168552f,  0.054705f,  0.045241f,
-  0.048057f,  -0.410957f, -2.188854f, -0.169812f, 0.015521f,  0.176856f,
-  -0.179331f, -0.352640f, -0.491735f, -1.743206f, 0.044227f,  0.010454f,
-  0.823643f,  -0.119781f, -0.098359f, 0.093119f,
-};
-
-static const float av1_ab_partition_nn_bias_128_layer1[LABEL_SIZE] = {
-  -0.433195f, -0.120488f, -0.116721f, 0.112134f,  0.118170f, -0.259769f,
-  -0.077530f, 0.394044f,  0.279167f,  -0.317988f, 0.189538f, 0.314776f,
-  0.325655f,  -0.107123f, 0.591049f,  0.358744f,
-};
-
-static const NN_CONFIG av1_ab_partition_nnconfig_128 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      64,  // num_hidden_nodes
-  },
-  {
-      av1_ab_partition_nn_weights_128_layer0,
-      av1_ab_partition_nn_weights_128_layer1,
-  },
-  {
-      av1_ab_partition_nn_bias_128_layer0,
-      av1_ab_partition_nn_bias_128_layer1,
-  },
-};
-
-// nn model for ab partition pruning, 64x64.
-static const float av1_ab_partition_nn_weights_64_layer0[FEATURE_SIZE * 64] = {
-  -0.495347f, -0.049498f, -0.026804f, 0.030474f,  -0.289308f, -0.264193f,
-  -0.141121f, -0.072562f, -0.391665f, -0.051491f, -0.234761f, 0.027155f,
-  -0.038217f, 0.014872f,  -0.289728f, -0.233577f, -0.415875f, -0.343615f,
-  -0.442543f, -0.482492f, 0.073510f,  0.007503f,  2.162329f,  -0.362849f,
-  2.145915f,  -0.883135f, 0.185636f,  -0.062859f, -0.465574f, -0.486205f,
-  -0.056710f, -0.330642f, -0.321860f, 0.042321f,  -0.348965f, 0.003542f,
-  -0.291365f, -0.078164f, -0.345093f, -0.220272f, -0.471270f, -0.763853f,
-  0.246622f,  0.199651f,  -0.663420f, -0.154152f, -1.220383f, 0.047138f,
-  0.816811f,  0.083247f,  -0.218839f, 0.038143f,  -0.063436f, 0.015517f,
-  -0.307320f, -0.166956f, -0.169499f, -0.399005f, -0.234638f, -0.162266f,
-  0.050425f,  -0.221723f, -0.256942f, -0.287285f, 0.144011f,  -0.033245f,
-  0.083649f,  0.119428f,  -0.056706f, -0.117805f, 0.021866f,  -0.257300f,
-  -0.201378f, -0.217484f, -0.413780f, -0.145793f, 0.082792f,  -0.347247f,
-  0.042539f,  -0.302697f, 1.652316f,  0.000701f,  -0.482843f, -0.160332f,
-  -0.450099f, 0.212399f,  -4.715360f, -5.336774f, -5.375758f, -6.048339f,
-  0.085956f,  -0.037767f, 1.052409f,  -0.931924f, -2.221907f, 0.268946f,
-  0.015512f,  1.237094f,  -1.092185f, 0.418247f,  -0.082143f, -0.076914f,
-  -0.060749f, -0.325440f, -0.296960f, -0.066815f, -0.158477f, -0.373945f,
-  -0.122322f, -0.113495f, -0.097978f, -0.192816f, -0.270418f, 0.035840f,
-  -0.015458f, -0.121071f, -0.279582f, -0.067683f, 0.097855f,  0.019839f,
-  0.451127f,  0.004376f,  1.410392f,  3.255835f,  -0.344815f, 0.145202f,
-  0.204132f,  0.171948f,  -0.527736f, -0.110353f, 0.901448f,  0.003238f,
-  -3.822090f, 0.235462f,  1.024823f,  -0.821244f, 0.876056f,  2.553762f,
-  -3.478597f, -2.076582f, -0.265515f, -0.055923f, -0.156980f, -0.164097f,
-  -0.246040f, 0.039430f,  -0.071769f, -0.118847f, -0.304053f, -0.281541f,
-  -0.226021f, -0.263091f, -0.127359f, -0.249410f, -0.051023f, 0.083911f,
-  0.084721f,  0.168089f,  -0.272169f, -0.204998f, -0.008303f, -0.173998f,
-  0.079376f,  -0.197426f, -0.199052f, -0.118794f, -0.063753f, -0.094769f,
-  0.066176f,  -0.175832f, -0.238752f, -0.287960f, -0.134307f, -0.185953f,
-  -0.385845f, 0.119769f,  -0.006567f, -0.382126f, -0.214221f, 0.038449f,
-  -0.253484f, -0.282766f, -0.020249f, -0.193929f, 0.016281f,  -0.114423f,
-  -0.145940f, -0.281621f, -0.007588f, -0.131470f, -0.189012f, -0.185699f,
-  -0.279011f, -0.008132f, 0.208463f,  0.020569f,  -0.206803f, -0.213408f,
-  -0.206131f, -0.290245f, 0.069701f,  -0.000371f, -0.307572f, -0.451785f,
-  -0.300838f, -0.453186f, -0.301691f, 0.046327f,  -0.312668f, 0.058272f,
-  -0.303131f, -0.376252f, 0.108384f,  -0.086623f, -0.100630f, -0.027330f,
-  -0.003969f, 0.089502f,  -0.200722f, -0.107889f, 0.061843f,  -0.008478f,
-  -0.265057f, -0.271132f, -0.073562f, 0.129337f,  -0.283698f, -0.353414f,
-  0.076420f,  -0.244280f, -0.119537f, -0.105366f, -0.184692f, -0.038817f,
-  -0.478507f, -0.118808f, -0.472979f, -0.305884f, -0.462813f, -0.189581f,
-  -0.011932f, -0.585700f, 0.253212f,  -1.061900f, -0.205116f, -0.336407f,
-  -0.762199f, 0.577737f,  0.230832f,  0.434440f,  -0.096713f, 0.038552f,
-  -0.147800f, -0.213553f, 0.041740f,  -0.281907f, -0.026154f, -0.082356f,
-  -0.331871f, -0.408247f, -0.129022f, -0.037550f, -0.310233f, -0.320883f,
-  -0.391963f, -0.467392f, 0.027453f,  -0.394761f, -0.045544f, 0.076052f,
-  0.483985f,  0.067093f,  0.141361f,  0.576772f,  0.859718f,  2.566515f,
-  -0.025476f, 0.769738f,  -0.680235f, -1.683309f, -2.394131f, -0.000714f,
-  -0.615021f, -0.195856f, -0.434035f, -0.295010f, -0.668659f, -0.245959f,
-  0.551148f,  1.777227f,  -0.461630f, 0.043093f,  0.012293f,  -0.255841f,
-  -0.097070f, -0.371156f, -0.146323f, -0.015508f, -0.103873f, -0.087476f,
-  -0.297266f, -0.128699f, -0.149555f, 0.016534f,  -0.375498f, -0.346759f,
-  -0.455156f, -0.147509f, -0.427076f, -0.354431f, -0.158025f, -0.164604f,
-  -0.237038f, -0.010314f, -0.092884f, -0.397084f, -0.217980f, -0.127184f,
-  -0.048421f, -0.144133f, 0.889073f,  0.012606f,  3.007608f,  -0.602584f,
-  -1.849480f, -0.373159f, -1.890695f, -3.609938f, 0.811923f,  -1.867208f,
-  -0.244326f, -0.018012f, -0.211192f, -0.220196f, 0.169363f,  0.119141f,
-  -0.230715f, 0.083247f,  0.020367f,  -0.128629f, -0.217455f, -0.159640f,
-  1.815952f,  -0.369238f, -1.186447f, -0.658753f, -0.511026f, -0.096934f,
-  0.662971f,  0.486475f,  0.159746f,  -0.018932f, 3.692397f,  1.384353f,
-  -0.401984f, -0.248380f, -0.140861f, 0.215248f,  -0.023711f, 0.059679f,
-  -0.072260f, 0.004271f,  0.039545f,  -0.347971f, -0.081851f, -0.474896f,
-  -0.181572f, 0.066736f,  -0.157822f, -0.163760f, -0.171113f, -0.089935f,
-  -0.338281f, -0.421444f, -0.306687f, -0.085283f, -0.377953f, -0.138750f,
-  -0.102701f, -0.312336f, 0.149831f,  0.007229f,  -0.155700f, -0.173611f,
-  4.074261f,  1.342306f,  -1.272712f, 1.570899f,  -0.545093f, -0.317605f,
-  -0.189440f, -0.133910f, -0.273190f, -0.108020f, -0.166107f, 0.021413f,
-  -0.239130f, -0.067211f, 0.041957f,  -0.039234f, -1.003587f, -0.094412f,
-  0.532512f,  -0.870538f, -1.118023f, -1.160983f, -0.736307f, -0.418752f,
-  0.419466f,  0.492122f,  -0.004368f, -0.022096f, -1.115132f, 0.150886f,
-  2.396852f,  2.660000f,  -0.376537f, 0.468628f,  0.149413f,  -0.074898f,
-  -0.067154f, 0.021245f,  0.127857f,  0.294189f,  0.508056f,  0.390232f,
-  -3.899177f, -3.414681f, -3.929195f, -4.160545f, -0.274323f, -0.052583f,
-  -0.003545f, -0.433084f, -0.404891f, -0.145051f, -0.312367f, 0.004579f,
-  -0.398724f, -0.372068f, -0.234279f, 0.017799f,  -0.424760f, -0.646717f,
-  -0.047568f, 2.924664f,  -0.644165f, 0.359349f,  -0.294800f, 0.591746f,
-  -0.404710f, -0.092358f, -0.250729f, 0.030829f,  -0.147149f, -0.476023f,
-  -0.071803f, -0.482516f, -0.293117f, -0.215923f, -0.373122f, -0.085315f,
-  -0.377052f, -0.449899f, -0.056452f, 0.138081f,  -0.085350f, -0.308391f,
-  0.106661f,  0.176234f,  0.258869f,  -0.230172f, -0.233029f, -0.241208f,
-  -0.067509f, -0.223172f, -0.118353f, -0.302478f, -0.579632f, -0.561326f,
-  -0.158114f, -0.223167f, -0.026689f, 0.051863f,  0.212834f,  -0.304714f,
-  -0.169071f, -0.193695f, -0.075682f, -0.170860f, -0.241008f, -0.044648f,
-  0.280815f,  -0.002585f, -0.283552f, -0.037701f, -0.681169f, -0.274535f,
-  -0.380595f, 0.109504f,  -0.111141f, -0.437685f, -0.094459f, 0.144206f,
-  -0.106139f, -0.211832f, -0.054742f, -0.172813f, -0.295905f, -0.071907f,
-  -0.418429f, -0.183240f, 0.031319f,  -0.095785f, -0.315447f, 0.069404f,
-  -0.422910f, -0.029867f, -0.357321f, -0.199976f, -0.337707f, -0.070188f,
-  -0.178198f, 0.177208f,  0.134688f,  -0.081933f, -0.229452f, -0.208872f,
-  0.026287f,  -0.364040f, -0.063696f, -0.227443f, -0.234401f, -0.205699f,
-  -0.267238f, -0.494125f, -0.056255f, 0.053715f,  -0.487754f, 0.014818f,
-  0.087383f,  -0.077556f, -0.168085f, -0.436851f, -0.276286f, -0.137845f,
-  -0.107606f, -0.103653f, -0.233766f, -0.419083f, 0.169185f,  0.010186f,
-  -0.001587f, 0.086735f,  -2.465718f, 1.482185f,  1.621193f,  -2.081680f,
-  1.386553f,  -3.204335f, -0.267111f, -0.004508f, 0.164712f,  0.274147f,
-  1.724306f,  -2.273659f, 0.749574f,  -0.891905f, 0.105965f,  -0.030428f,
-  -0.416018f, -0.300762f, 0.122911f,  -0.316908f, -0.292504f, 0.138666f,
-  -0.161327f, -0.042143f, -0.249128f, 0.149210f,  -0.088987f, -0.654101f,
-  -1.501843f, 0.216777f,  0.955914f,  0.524158f,  -1.642561f, -1.643626f,
-  0.864797f,  -0.425451f, -2.115764f, -0.012502f, 0.065172f,  1.297270f,
-  0.018845f,  1.167276f,  -0.470970f, -0.244995f, 0.374782f,  -1.811056f,
-  -0.055430f, -0.024102f, -0.376519f, -0.339640f, -0.119177f, -0.277995f,
-  -0.290095f, -0.081362f, -0.144139f, -0.118037f, -0.180357f, -0.217559f,
-  -0.370683f, 0.172816f,  -0.265069f, 0.194321f,  -0.273478f, 0.037442f,
-  -0.235552f, -0.078625f, -0.447541f, 0.016836f,  -0.271123f, -0.171481f,
-  -0.321477f, -0.184826f, -0.442981f, -0.227273f, -0.370666f, -0.237232f,
-  -0.257493f, -0.225714f, -0.153716f, -0.283487f, -0.155399f, 0.067697f,
-  0.230343f,  -0.034318f, -0.022687f, -0.047090f,
-};
-
-static const float av1_ab_partition_nn_bias_64_layer0[64] = {
-  -0.212182f, -0.233725f, -0.758846f, -0.158162f, 0.614743f,  -0.150944f,
-  -0.075727f, -0.208414f, 1.054996f,  0.713758f,  -0.300051f, -0.151482f,
-  -2.443570f, 0.430590f,  -0.129001f, -0.160733f, -0.230547f, -0.143228f,
-  -0.140577f, -0.086812f, -0.212298f, -0.159557f, -0.055647f, -0.211423f,
-  0.578161f,  -0.220318f, -0.210107f, -3.111584f, 0.604419f,  -0.232622f,
-  -0.209924f, -0.130794f, -0.084097f, -0.036005f, 0.294594f,  -2.535531f,
-  -0.209783f, -0.211189f, -2.766337f, 0.000000f,  0.450177f,  -1.754884f,
-  3.262664f,  -0.209691f, -0.614886f, -0.211257f, -0.109096f, -0.190492f,
-  -0.109007f, -0.026910f, -0.136035f, -0.212321f, -0.139320f, -0.212233f,
-  -0.305430f, 0.739171f,  0.991277f,  -0.088150f, 0.086313f,  -0.023379f,
-  -0.125366f, -0.063576f, -0.212169f, -0.047463f,
-};
-
-static const float av1_ab_partition_nn_weights_64_layer1[64 * LABEL_SIZE] = {
-  -0.036800f, 0.528721f,  0.490767f,   0.144409f,  1.103640f,  0.361910f,
-  -0.180069f, 0.068033f,  -14.868382f, 0.359013f,  0.322567f,  -0.199212f,
-  0.906164f,  -0.488254f, 0.149653f,   -0.216394f, -0.099347f, 0.004936f,
-  -0.111391f, 0.074848f,  -0.041709f,  0.147627f,  -0.018905f, 0.096116f,
-  0.184817f,  -0.016241f, 0.115739f,   2.376754f,  0.637097f,  0.052954f,
-  0.136428f,  0.225267f,  -0.181873f,  -0.142876f, 0.684048f,  0.658791f,
-  0.105795f,  0.241705f,  1.381114f,   -0.209379f, 1.145949f,  0.795293f,
-  -9.361877f, 0.198302f,  0.539600f,   0.092317f,  -0.081695f, 0.200777f,
-  0.102334f,  0.081583f,  0.060948f,   -0.025110f, 0.160951f,  -0.020170f,
-  0.234006f,  -0.029369f, 0.375036f,   0.270209f,  -0.556529f, 1.402949f,
-  0.101777f,  -0.027331f, 0.004502f,   -0.153166f, -0.116651f, 0.151573f,
-  -0.022187f, 0.144044f,  -0.108719f,  -0.129942f, -0.270321f, 0.227363f,
-  1.892330f,  -0.661052f, -0.219398f,  -0.229417f, -0.856438f, -1.196988f,
-  -0.081774f, 0.078847f,  -0.207057f,  -0.048947f, 0.152073f,  -0.243056f,
-  -0.233329f, -0.288689f, -0.158333f,  -0.141177f, -0.715436f, 0.016947f,
-  -0.093752f, 0.204984f,  -1.209782f,  0.155683f,  0.092239f,  0.146495f,
-  0.813146f,  -0.027757f, 0.330982f,   2.173948f,  -0.028867f, -0.141815f,
-  0.292708f,  -0.204794f, 0.014496f,   1.032799f,  1.312155f,  0.107020f,
-  0.824752f,  -0.013945f, 0.184829f,   -0.041633f, 0.215300f,  -0.476088f,
-  -0.053213f, 0.126862f,  -0.020777f,  0.082893f,  -0.223727f, -0.923063f,
-  0.466529f,  0.082140f,  -0.845758f,  -1.140791f, -0.262033f, 0.138491f,
-  0.151717f,  -0.182479f, -0.131128f,  0.055411f,  0.106771f,  0.125552f,
-  0.297184f,  -0.257403f, -0.059884f,  -0.274903f, 2.694357f,  -0.108244f,
-  0.025377f,  0.043092f,  -0.558317f,  3.517159f,  -0.270833f, -0.240676f,
-  0.205100f,  -0.057068f, -0.140445f,  -0.193449f, -0.030061f, -0.286762f,
-  -0.467523f, -0.012647f, 0.190564f,   0.022394f,  -0.101479f, 0.339684f,
-  -0.902743f, -0.169578f, -0.178029f,  -0.041836f, -3.952108f, -0.028298f,
-  -0.221137f, -0.733895f, -0.223895f,  0.039012f,  0.687867f,  0.021423f,
-  0.113063f,  0.676087f,  -0.961000f,  -0.064847f, 0.712856f,  -0.192765f,
-  -0.001132f, 0.016689f,  -0.236020f,  -0.766186f, -0.175729f, 0.012879f,
-  -0.251064f, -0.105523f, -0.039212f,  -0.347584f, 0.304352f,  -0.034174f,
-  -0.364258f, -0.685252f, -0.266115f,  -0.247345f, -0.155905f, 0.152283f,
-  -0.156315f, 0.174082f,  -0.757654f,  0.102303f,  -2.192316f, -0.245815f,
-  0.119882f,  -0.086542f, 1.987246f,   -1.353163f, -0.374813f, -0.233504f,
-  -1.980895f, 0.692093f,  -0.168351f,  0.172700f,  -0.009052f, -0.015734f,
-  0.106679f,  -0.060472f, -0.256813f,  -0.074874f, -0.207488f, -0.329515f,
-  -0.418268f, -0.017940f, -0.036081f,  0.064719f,  -1.488016f, 0.020591f,
-  -0.176325f, -0.141074f, 0.944494f,   0.150237f,  -0.249805f, -0.277280f,
-  0.012686f,  0.132483f,  0.116123f,   0.013737f,  -0.116091f, 0.750340f,
-  3.251343f,  -0.188864f, 1.096992f,   0.058467f,  -0.041433f, -0.037937f,
-  -0.133294f, -0.137908f, -0.171132f,  0.106362f,  0.069383f,  -0.052662f,
-  -0.177883f, -0.408049f, 0.680221f,   -0.117035f, -0.904240f, -1.395228f,
-  0.154527f,  0.134427f,  0.022767f,   -0.158886f, -0.230316f, 0.161096f,
-  0.362213f,  -0.235060f, -0.941620f,  0.055912f,  -0.049458f, -0.166632f,
-  0.481418f,  0.930146f,  0.041108f,   0.033674f,  1.372066f,  -1.847709f,
-  0.003324f,  0.259534f,  0.177014f,   -0.202761f, -0.262017f, -0.190852f,
-  -0.102839f, 0.028338f,  0.187193f,   -0.041684f, 0.123973f,  -0.198576f,
-  -0.110369f, -1.431400f, 0.208369f,   -0.302370f, -0.248549f, 0.062985f,
-  0.673409f,  0.036662f,  -0.711340f,  -0.120584f, -0.189789f, 0.098812f,
-  2.947819f,  0.216567f,  -0.414472f,  -0.181742f, 1.873779f,  -0.222726f,
-  -0.782870f, 0.007889f,  0.015062f,   -0.554328f, 0.182928f,  -0.191430f,
-  0.123636f,  -0.215460f, -0.225245f,  0.251516f,  -0.013025f, -1.359595f,
-  -0.750602f, 0.342667f,  -0.141899f,  -0.687493f, -0.072639f, 0.048018f,
-  -0.242107f, -0.031917f, -0.287472f,  -0.046088f, 0.832197f,  -0.016576f,
-  -1.553349f, -0.216341f, 0.023077f,   -0.410867f, 4.243743f,  -0.514878f,
-  -0.066007f, -0.160696f, -0.262678f,  -0.648790f, -0.430586f, 0.199940f,
-  -0.202496f, -0.222241f, -0.016406f,  -0.121473f, 0.000828f,  -0.081584f,
-  -0.152641f, -0.190166f, 0.644400f,   0.040196f,  -0.302104f, -1.143654f,
-  -0.160327f, -0.320780f, -0.187006f,  0.037311f,  0.440618f,  -0.070733f,
-  -0.117785f, 1.527539f,  -0.419310f,  0.001300f,  1.389956f,  -0.036366f,
-  -0.269203f, 0.612265f,  2.721897f,   -0.086836f, -0.446999f, 0.012525f,
-  -0.078317f, -0.287052f, -0.111188f,  -0.085181f, -0.164667f, -0.010466f,
-  -0.569722f, -0.018888f, -0.101663f,  -1.147130f, -0.465204f, 0.114524f,
-  -2.192402f, -0.221325f, 0.375748f,   0.206284f,  -0.261548f, -0.246257f,
-  -0.143004f, -0.069981f, -0.057306f,  -0.116481f, -0.435903f, -0.314970f,
-  0.013210f,  -0.010175f, 4.630571f,   -0.473226f, -0.197199f, -0.028204f,
-  0.122907f,  2.475548f,  0.025011f,   -0.092603f, -0.127561f, -0.151330f,
-  -0.077295f, 0.245016f,  -0.045005f,  0.183396f,  -0.330556f, -0.384887f,
-  0.356374f,  -0.016618f, -0.463353f,  -1.291546f, -0.071986f, -0.311599f,
-  0.072385f,  -0.430786f, -2.094788f,  0.202733f,  -0.910109f, -1.336543f,
-  -0.086800f, -0.096413f, 1.544383f,   0.031860f,  -0.796211f, 0.762786f,
-  3.250022f,  -0.441798f, -0.698537f,  0.062839f,  0.033525f,  -0.362996f,
-  0.027022f,  -1.131264f, -0.228926f,  0.053885f,  -0.338628f, 0.155037f,
-  -0.046844f, -0.888172f, -0.241767f,  0.084965f,  -0.617743f, -0.049896f,
-  -0.036894f, -0.304783f, -0.002639f,  0.137957f,  0.052121f,  -0.131161f,
-  -0.117200f, -0.253380f, -0.205561f,  -0.302450f, -0.047397f, -0.330518f,
-  3.613420f,  -1.525951f, -0.026738f,  0.209150f,  -2.103534f, 2.019689f,
-  -0.366199f, -0.095260f, 0.027417f,   -0.242512f, 0.162579f,  0.052113f,
-  -0.293851f, -0.068138f, -0.005799f,  -0.344696f, -0.114824f, -0.431107f,
-  -0.120058f, -1.139926f, -1.048379f,  0.036446f,  -0.323020f, -0.432945f,
-  0.454151f,  -0.140058f, 0.050649f,   -0.094900f, -0.017278f, -0.238719f,
-  1.193153f,  0.120447f,  -0.496061f,  0.917431f,  2.936126f,  -0.115521f,
-  -0.347397f, -0.435325f, -0.004383f,  -0.211864f, 0.162383f,  -1.040726f,
-  0.089537f,  -0.128579f, -0.133505f,  0.107129f,  -0.435657f, -0.180388f,
-  0.043650f,  0.018709f,  -0.773242f,  -0.687192f, -0.120633f, -0.063626f,
-  0.029912f,  0.113972f,  -0.403502f,  -0.127640f, -0.269625f, 0.129794f,
-  -0.188539f, 0.041641f,  0.029769f,   -0.198374f, 1.401407f,  0.353887f,
-  -0.219925f, 0.260515f,  1.157034f,   -2.992044f, -0.097618f, -0.064417f,
-  -0.203626f, -0.008217f, -0.112339f,  -0.227407f, -0.155118f, 0.247705f,
-  -0.012304f, -0.248447f, -0.913463f,  -0.064788f, -0.214619f, -0.251761f,
-  -0.386861f, -0.040574f, -0.163219f,  -0.100700f, 1.488274f,  -0.071684f,
-  -0.033626f, -0.006497f, -0.246945f,  -0.145221f, -3.747390f, 0.149609f,
-  -0.263326f, -0.297385f, -1.039896f,  -0.083174f, -0.025473f, -0.235586f,
-  -0.001087f, 0.254286f,  0.265106f,   0.007325f,  0.199239f,  0.134103f,
-  -0.578211f, -0.259801f, -0.062373f,  2.368348f,  0.560556f,  -0.252260f,
-  0.889997f,  -0.447872f, -0.059218f,  -0.095315f, -0.061667f, 0.183580f,
-  -0.157479f, 0.055387f,  -0.831734f,  0.007606f,  -1.104906f, 0.301180f,
-  -0.117115f, 0.212959f,  4.727223f,   -0.243833f, -0.397495f, -0.025021f,
-  -0.367587f, -2.082058f, -0.217699f,  0.148111f,  0.252430f,  0.111088f,
-  -0.260692f, 0.095124f,  -0.407774f,  -0.322169f, 0.002927f,  0.126169f,
-  -1.272325f, -0.279772f, -0.373680f,  -0.485177f, -0.605458f, 0.021225f,
-  -0.092031f, -0.226585f, 1.895162f,   0.037866f,  -0.275475f, 1.614360f,
-  -0.014972f, -0.277679f, -3.449082f,  -0.092060f, -0.747873f, 0.020716f,
-  2.776178f,  -0.049963f, 0.183999f,   -0.295259f, -0.028868f, 0.221895f,
-  0.001265f,  0.336823f,  0.219372f,   0.112824f,  0.408132f,  -0.017940f,
-  -0.311666f, 1.489606f,  -0.058093f,  -0.305659f, -0.491933f, -0.143847f,
-  0.166115f,  0.042867f,  -0.123447f,  -0.087099f, -0.305395f, -0.365079f,
-  -0.755801f, -0.160649f, 0.736260f,   -0.008611f, 0.095836f,  -0.017345f,
-  5.697515f,  -0.498971f, -0.125280f,  0.199907f,  0.300053f,  0.605026f,
-  -0.228225f, -0.259523f, 0.016384f,   0.146973f,  0.210258f,  0.226766f,
-  -0.075178f, -0.050924f, 0.188496f,   -0.415266f, -0.484880f, -0.236384f,
-  0.071931f,  -0.331863f, -0.601243f,  -0.232479f, -0.285272f, 0.123789f,
-  -1.341333f, 0.037082f,  -0.315202f,  -1.587215f, -0.271576f, 0.003216f,
-  -4.437186f, -0.256205f, -0.576589f,  -0.114147f, 2.153916f,  -0.369618f,
-  0.271415f,  0.145036f,  -0.158731f,  -0.240938f, -0.187369f, 0.036325f,
-  0.254771f,  0.211488f,  -0.240297f,  0.098417f,  -0.415011f, 2.334793f,
-  -0.127252f, 0.020069f,  -0.168755f,  -0.448922f, -0.219207f, 0.016232f,
-  -0.221935f, -0.269500f, -0.100636f,  0.102545f,  -0.809376f, -0.054979f,
-  0.360713f,  -0.326541f, 0.112933f,   0.138073f,  4.229404f,  -0.763801f,
-  -0.305429f, 0.199955f,  -1.787713f,  0.272866f,  0.109895f,  0.138466f,
-  -0.250259f, -0.167162f, -0.212588f,  -0.217589f, -0.067125f, -0.077490f,
-  -0.208970f, -0.006863f, -0.671146f,  -0.298320f, -0.165509f, 0.044597f,
-  -1.408624f, -0.213957f, -0.220947f,  0.129718f,  1.316777f,  -0.098928f,
-  -0.008121f, -0.558293f, -0.297290f,  -0.218873f, -4.346638f, -0.228174f,
-  -0.204710f, -0.388864f, 2.697919f,   0.025260f,  0.857020f,  0.009921f,
-  0.036915f,  -0.320275f, -0.087937f,  0.022636f,  0.236667f,  0.135496f,
-  -0.059616f, -0.192955f, 0.009470f,   2.139589f,  -0.200449f, 0.129818f,
-  1.017444f,  -0.608299f, 0.257914f,   -0.134306f, -0.033327f, 0.002855f,
-  -0.338598f, 0.015559f,  0.117362f,   -0.166760f, 0.086903f,  -0.167666f,
-  0.193523f,  0.033852f,  -1.147686f,  0.489468f,  -0.006969f, 0.125630f,
-  1.557907f,  -1.604449f, -0.071114f,  0.096178f,  0.007065f,  0.200013f,
-  0.213393f,  0.168466f,  -0.100568f,  -0.117861f, -0.161542f, -0.072561f,
-  -1.069871f, -0.470138f, -0.352578f,  -1.503513f, -0.001394f, -0.380109f,
-  0.065089f,  -0.281668f, 0.988953f,   -0.002778f, -0.659026f, -0.470692f,
-  -0.407292f, 0.011710f,  -1.362085f,  0.184738f,  -0.135786f, -1.374241f,
-  4.487930f,  -0.067274f, -0.956404f,  -0.233995f, 0.224527f,  -0.454556f,
-  0.037900f,  -0.281658f, 0.208224f,   -0.254753f, 0.045740f,  0.051444f,
-  -0.388281f, 0.257112f,  -0.485030f,  -0.082659f, 0.148103f,  -1.007456f,
-  -0.022295f, 0.036984f,  -0.369401f,  -0.076943f, -0.007636f, -0.293022f,
-  0.470466f,  0.199012f,  -2.158182f,  0.036577f,  -0.014725f, -0.229516f,
-  2.236929f,  0.030945f,  -0.400045f,  0.109348f,  0.214691f,  -0.891516f,
-  -0.251379f, -0.217358f, 0.013733f,   0.205573f,  -0.151725f, -0.191782f,
-  -0.339630f, -0.163905f, -0.119191f,  -0.032516f, 0.503015f,  0.025772f,
-  0.029094f,  -1.146153f, 0.216723f,   -0.330023f, 0.064695f,  -0.262521f,
-  0.425612f,  -0.093080f, -0.489648f,  1.051293f,  -0.092332f, 0.095557f,
-  -0.874132f, 0.218483f,  -0.127648f,  -1.605802f, 2.763617f,  -0.186734f,
-  -1.243166f, -0.193514f, -0.173748f,  0.337822f,  0.183873f,  -0.251594f,
-  -0.211582f, 0.144081f,  0.029620f,   -0.024853f, -0.385140f, 0.467341f,
-  -0.928316f, -0.195442f, 0.917783f,   0.357084f,  0.174445f,  -0.073659f,
-  -0.012811f, -0.115420f, -0.181147f,  -0.364449f, -0.567395f, -0.012969f,
-  -1.680714f, 0.065323f,  0.198063f,   -0.244201f, 1.428545f,  -0.432539f,
-  -0.208931f, -0.091205f, 0.957125f,   0.813519f,  -0.262677f, 0.246852f,
-  0.015536f,  0.055026f,  0.067054f,   0.262103f,  -0.358115f, -0.095206f,
-  -0.267522f, -0.402710f, -0.680397f,  -0.123627f, -0.385590f, -1.504680f,
-  -0.169513f, -0.215338f, 0.043633f,   -0.079052f, -0.464410f, 0.122894f,
-  -0.278231f, -2.456445f, -0.159917f,  -0.015597f, -0.735449f, -0.078854f,
-  -0.400290f, -1.153870f, 3.657228f,   -0.287093f, -1.174355f, -0.102001f,
-  -0.288281f, 0.185209f,  -0.145228f,  -0.200449f, -0.099914f, -0.138354f,
-  0.254428f,  -0.161751f, -0.118206f,  0.296043f,  -0.482613f, 0.080932f,
-  1.097605f,  -0.010190f, 0.232439f,   0.447617f,  -0.133508f, 0.115763f,
-  -0.388589f, 0.174695f,  -0.236014f,  0.006284f,  -1.374129f, 0.092015f,
-  -0.241419f, -0.231667f, 2.763950f,   -0.922932f, -0.061605f, 0.208740f,
-  -1.597190f, 1.353325f,  -0.198528f,  0.250498f,  -0.013950f, -0.203861f,
-  -0.254563f, 0.081931f,  -0.413369f,  0.011844f,  0.080961f,  -0.231161f,
-  -1.234909f, -0.440843f, -0.174980f,  -0.315283f, -0.337474f, -0.123243f,
-  -0.310001f, -0.271028f, 0.364179f,   0.022845f,  -0.535517f, -0.772936f,
-  -0.188435f, 0.039667f,  -0.807463f,  0.266550f,  -0.288857f, -1.630789f,
-  1.280155f,  0.065712f,  -0.279960f,  -0.300056f, 0.258440f,  -0.073781f,
-  0.213878f,  0.042196f,  0.021360f,   0.211698f,  -0.003751f, -0.192673f,
-  -0.137008f, 0.247878f,  -0.470604f,  0.073164f,  1.523241f,  0.734755f,
-  -0.114126f, -0.193834f, -0.025759f,  0.263183f,
-};
-
-static const float av1_ab_partition_nn_bias_64_layer1[LABEL_SIZE] = {
-  -0.343508f, -0.706936f, -0.160676f, -0.877101f, -0.517567f, -0.253254f,
-  -0.148074f, 0.923430f,  -0.364770f, 0.203550f,  0.401216f,  0.938246f,
-  -0.872737f, 0.718723f,  0.703398f,  2.560015f,
-};
-
-static const NN_CONFIG av1_ab_partition_nnconfig_64 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      64,  // num_hidden_nodes
-  },
-  {
-      av1_ab_partition_nn_weights_64_layer0,
-      av1_ab_partition_nn_weights_64_layer1,
-  },
-  {
-      av1_ab_partition_nn_bias_64_layer0,
-      av1_ab_partition_nn_bias_64_layer1,
-  },
-};
-
-// nn model for ab partition pruning, 32x32.
-static const float av1_ab_partition_nn_weights_32_layer0[FEATURE_SIZE * 64] = {
-  -0.323723f, -0.214013f, -0.007772f, -0.458851f, -0.125542f, -0.123860f,
-  -0.410973f, -0.209389f, -0.087580f, -0.272881f, -0.168500f, -1.130845f,
-  0.344916f,  -0.475017f, -0.362262f, -0.195662f, -0.566124f, 0.782163f,
-  0.411575f,  -0.013378f, -0.318650f, -0.124678f, -0.612909f, -0.315788f,
-  -0.263990f, -0.508783f, -0.048938f, -0.416407f, -0.402648f, -0.156644f,
-  0.225887f,  -0.000493f, 2.682241f,  0.871204f,  0.059014f,  0.803542f,
-  -1.407028f, -1.154669f, 1.388148f,  -0.293348f, -0.003669f, -0.009607f,
-  1.330030f,  -0.337841f, 2.118617f,  1.033059f,  -0.084788f, 0.212904f,
-  0.082405f,  -0.070579f, -0.494005f, -0.173392f, 0.039546f,  -0.463865f,
-  0.077163f,  -0.434066f, 0.030835f,  -0.427139f, -0.560520f, -0.031606f,
-  -0.368541f, -0.027458f, 0.370574f,  0.461418f,  1.087682f,  -0.572137f,
-  -1.509596f, -0.765697f, -0.499383f, -0.277998f, -0.106492f, -0.129564f,
-  -0.169133f, -0.269834f, -0.114270f, -0.275431f, 0.016339f,  -0.156744f,
-  -0.267922f, 0.171216f,  0.110556f,  0.002954f,  -0.200327f, -0.187663f,
-  3.691601f,  1.234152f,  0.186315f,  -0.125370f, -0.211235f, -0.554432f,
-  -0.131072f, -0.124982f, -0.130339f, -0.235350f, 0.018903f,  0.012896f,
-  -0.159372f, -0.269571f, -0.025709f, -0.221251f, 0.061919f,  0.016307f,
-  0.384673f,  -0.134525f, -1.599126f, -0.416459f, -0.743052f, 0.670249f,
-  -0.169709f, 0.421681f,  -0.033360f, -0.072817f, 0.003647f,  -0.110632f,
-  -0.158651f, -0.095136f, 0.223759f,  0.165767f,  -0.269129f, -0.196075f,
-  -0.023183f, -0.293420f, 0.014875f,  0.018688f,  -0.153407f, -0.172009f,
-  -0.259947f, -0.124015f, 0.173653f,  -0.089103f, -0.021001f, -0.334230f,
-  0.027177f,  0.103371f,  -0.183860f, -0.204051f, -0.023721f, -0.192297f,
-  -0.143771f, -0.247106f, 0.218116f,  -0.013240f, 2.831783f,  1.483928f,
-  -0.877025f, -0.313462f, -0.411320f, -0.447825f, 0.605977f,  0.234684f,
-  -0.119150f, -0.075182f, -0.330463f, 0.071503f,  -0.254924f, -0.360071f,
-  -0.037022f, 0.063261f,  -0.148759f, -0.238254f, -0.462018f, -0.027166f,
-  0.065318f,  -0.235743f, -0.257194f, -0.094784f, 0.022423f,  0.055925f,
-  0.086672f,  -0.021010f, 0.009965f,  -0.001648f, -0.104917f, -0.387443f,
-  -0.102673f, -0.281706f, 0.145923f,  -0.233391f, -0.378365f, -0.145584f,
-  -0.077751f, -0.121166f, 1.134565f,  -0.097500f, -0.749202f, -0.544566f,
-  -1.361374f, -0.102494f, 1.089275f,  0.375299f,  -0.105091f, 0.037641f,
-  -0.054248f, -0.282691f, -0.377797f, -0.066427f, -0.253815f, -0.329677f,
-  -0.339326f, -0.128217f, -0.282905f, 0.014937f,  1.067185f,  -0.171764f,
-  0.484458f,  0.396706f,  -0.557055f, -0.891596f, -0.257839f, -0.720879f,
-  -0.218449f, -0.004755f, 1.572857f,  0.006229f,  1.962895f,  -0.029746f,
-  -4.137691f, -2.185991f, -2.763477f, -0.520437f, -0.208708f, 0.006444f,
-  -1.263078f, -0.304560f, 1.072374f,  2.556429f,  0.312850f,  0.257488f,
-  -0.634264f, 0.156769f,  -0.188943f, 0.040295f,  -0.389915f, 0.085250f,
-  -0.248525f, 0.045667f,  -0.776115f, -0.274680f, -0.448145f, -0.566161f,
-  -1.285316f, 0.079060f,  0.389124f,  -0.510401f, -0.015299f, -0.664661f,
-  0.099901f,  -0.470694f, -0.051593f, -1.076381f, -0.442104f, -0.197867f,
-  -0.330011f, -0.448523f, -0.301018f, -0.442093f, -0.491953f, -0.582091f,
-  -0.064569f, -0.156516f, 0.543522f,  -0.005924f, 0.161432f,  0.974793f,
-  0.273712f,  1.104850f,  -0.290312f, 0.313417f,  -0.125370f, 0.136234f,
-  -0.191227f, -0.165054f, 0.011872f,  -0.298871f, 0.095740f,  0.142760f,
-  -0.215771f, -0.031437f, 0.101041f,  -0.085620f, 0.435387f,  0.002786f,
-  1.971375f,  0.018392f,  -1.771940f, -0.401433f, 0.808263f,  -3.350013f,
-  2.296952f,  -1.024403f, -0.041645f, -0.034799f, -0.024078f, -0.347301f,
-  -0.276088f, -0.455907f, 0.266021f,  0.087348f,  -0.146566f, 0.040492f,
-  -0.539866f, -0.206851f, -0.387874f, -0.125508f, -0.496676f, -0.373845f,
-  -0.472356f, -0.357082f, -0.081254f, -0.456466f, 0.554713f,  0.002185f,
-  -4.225019f, 0.344025f,  0.728796f,  -0.262936f, 1.383924f,  1.577300f,
-  -2.653320f, -2.516156f, -0.301604f, -0.204105f, -0.138252f, -0.587536f,
-  -0.097889f, -0.352414f, -0.288276f, -0.184340f, -0.122741f, -0.243376f,
-  0.031970f,  -0.373402f, -0.396079f, 0.045566f,  0.072595f,  -0.222681f,
-  -0.243802f, -0.340129f, -0.258494f, -0.192041f, -0.386112f, -0.240940f,
-  -0.047268f, -0.555802f, -0.032514f, -0.241341f, -0.167463f, -0.478308f,
-  -0.205936f, -0.316275f, 0.103729f,  -0.197893f, -0.128029f, -0.218796f,
-  -0.167362f, -0.111814f, -0.126062f, -0.394260f, -0.025357f, -0.402697f,
-  -0.587395f, -0.400385f, -0.259664f, -0.415588f, -0.338503f, -0.399166f,
-  -0.270504f, 0.234505f,  0.272144f,  0.266938f,  -0.392395f, -0.011717f,
-  -0.384221f, -0.473446f, -0.038420f, -0.241101f, -0.234402f, -0.275567f,
-  -0.410454f, -0.377599f, -0.179099f, -0.138432f, -0.248083f, -0.543026f,
-  -0.428043f, -0.239895f, -0.333193f, -0.103346f, -0.039038f, -0.171109f,
-  -0.119432f, -0.222351f, 0.000450f,  0.208724f,  -0.510526f, -0.144656f,
-  -0.316721f, -0.344846f, -0.244794f, -0.129134f, -0.045634f, -0.400183f,
-  0.043714f,  -0.235414f, 0.115594f,  -0.195616f, -0.106693f, -0.124242f,
-  0.083990f,  0.049110f,  -0.196130f, -0.059860f, -0.464235f, -0.516443f,
-  -0.101521f, -0.422379f, -0.413955f, -0.042991f, -0.345263f, -0.129264f,
-  -0.106911f, -0.140156f, -0.457841f, -0.199848f, -0.218954f, -0.329850f,
-  -0.364097f, -0.335262f, -0.312254f, -0.299331f, -0.052710f, -0.251019f,
-  -0.023459f, -0.222538f, 0.028849f,  -0.088038f, -0.301550f, -0.273566f,
-  0.067295f,  -0.174608f, -0.445784f, -0.158366f, -0.567275f, -0.557652f,
-  -0.353503f, -0.302092f, -0.302049f, -0.551793f, -0.034535f, -0.225190f,
-  -0.210733f, -0.219377f, -0.057197f, -0.430933f, -0.025185f, -0.388150f,
-  -0.086147f, -0.430088f, 0.058466f,  -0.152129f, -0.058411f, -0.236392f,
-  -0.547669f, -0.613849f, -0.893774f, -0.351715f, -0.399227f, -0.454909f,
-  -0.324501f, 0.000490f,  -0.282167f, -0.073163f, -0.281452f, 0.047932f,
-  -0.175500f, 0.165220f,  -0.276212f, 0.062153f,  -0.217054f, -0.255487f,
-  -0.146416f, -0.097718f, -0.173809f, -0.559328f, -0.055695f, -0.391193f,
-  -0.132020f, -0.561184f, -0.308666f, -0.474053f, -0.219149f, -0.246558f,
-  -0.158325f, 0.151907f,  -0.266835f, -0.144697f, -0.193960f, -0.046587f,
-  -0.220028f, -0.247355f, 0.135584f,  0.016511f,  0.367705f,  -1.855877f,
-  0.435622f,  0.444710f,  -3.372301f, -3.030489f, 1.013267f,  0.380951f,
-  -0.170011f, -0.111415f, -0.456146f, -0.107254f, -0.095220f, -0.053078f,
-  -0.135864f, -0.591949f, -0.252810f, -0.324799f, -0.094796f, -0.260969f,
-  -0.391981f, -0.063170f, -0.336130f, -0.470127f, -0.405168f, -0.433219f,
-  -0.309563f, -0.295462f, -0.552270f, -0.012300f, -0.057793f, -0.034494f,
-  -0.446843f, -0.640160f, -1.188681f, -0.791361f, 0.543271f,  1.189112f,
-  1.458468f,  -0.005876f, -0.927475f, 0.062038f,  -1.170818f, 0.338227f,
-  -3.007096f, -4.559296f, -4.045457f, -5.953635f, -0.228386f, -0.266890f,
-  -0.092595f, -0.377440f, -0.044534f, -0.053565f, -0.349268f, -0.415030f,
-  -0.310094f, 0.062721f,  0.251422f,  -0.014350f, -1.282910f, 1.619560f,
-  1.180566f,  -0.032163f, -1.322951f, -0.603601f, 1.443710f,  0.654650f,
-  -0.393227f, 0.003536f,  0.029725f,  -0.108925f, -0.053911f, 0.133977f,
-  -0.036145f, -0.168438f, 0.046989f,  -0.331463f, -0.176983f, -0.311922f,
-  -0.272389f, -0.379592f, -0.399993f, -0.297873f, -0.193425f, -0.177524f,
-  -0.258309f, -0.567312f, -0.260217f, -0.241869f, 0.024010f,  -0.032867f,
-  -0.039424f, -0.063670f, 0.193808f,  -0.303514f, -0.013376f, -0.057761f,
-  0.187922f,  0.006938f,  0.031810f,  0.180594f,  -1.198427f, 2.820662f,
-  0.154986f,  -0.375518f, 0.116925f,  -0.795782f, -0.085139f, -0.079365f,
-  -0.197936f, -0.321468f, -0.205271f, -0.558203f, -0.296235f, -0.151193f,
-  -0.158282f, -0.245402f, -0.208504f, -0.042335f, -0.087426f, -0.557129f,
-  -0.381427f, -0.441551f, -0.541011f, -0.060567f, -0.469305f, -0.032326f,
-  -2.453587f, -0.045568f, -0.296932f, 0.613061f,  -0.320284f, 0.191620f,
-  -0.827145f, -0.225277f, 0.275800f,  1.696635f,
-};
-
-static const float av1_ab_partition_nn_bias_32_layer0[64] = {
-  -0.176206f, 0.660189f,  -0.186156f, -2.481963f, -1.564218f, -0.280424f,
-  0.732684f,  -0.135581f, -2.193132f, -0.172771f, 0.605001f,  -0.060392f,
-  -0.067190f, -0.132969f, -1.410812f, -0.298701f, -0.105963f, -0.086173f,
-  0.632779f,  0.005585f,  1.310169f,  1.392136f,  -0.563860f, -0.051053f,
-  0.660998f,  -0.214726f, -1.894342f, -0.128288f, -0.330721f, -0.053988f,
-  -0.177726f, 1.200859f,  -0.178902f, -0.172620f, -0.184476f, -0.175559f,
-  0.538503f,  -0.322158f, -0.219080f, -0.058208f, -0.171347f, -0.216060f,
-  -0.174950f, -0.295740f, -0.184820f, -0.213896f, 1.317728f,  -0.020116f,
-  -0.208096f, 0.000000f,  1.246166f,  -0.225421f, -0.181555f, 0.861761f,
-  1.172429f,  -0.172892f, -0.737092f, -0.189904f, -0.179385f, -0.114618f,
-  -1.384604f, -0.201713f, -0.271948f, 0.372351f,
-};
-
-static const float av1_ab_partition_nn_weights_32_layer1[64 * 16] = {
-  -0.037828f,  1.529029f,  0.004927f,  1.475763f,  0.627172f,  0.325872f,
-  -0.990757f,  0.129476f,  0.889958f,  -0.082031f, 0.332133f,  0.074422f,
-  -0.176212f,  -0.074355f, 0.774378f,  0.110987f,  -0.155469f, 0.253310f,
-  0.882538f,   0.253605f,  0.332436f,  -5.389474f, 0.278470f,  0.168644f,
-  0.914611f,   0.154165f,  0.809262f,  -0.174734f, 0.923673f,  0.064716f,
-  -0.070228f,  -0.228735f, 0.002312f,  0.112222f,  -0.045502f, -0.046004f,
-  0.514101f,   0.306480f,  0.021232f,  -0.015955f, -0.288260f, 0.189177f,
-  -0.104158f,  0.103273f,  0.096910f,  -0.086328f, 1.327289f,  -0.154247f,
-  0.056676f,   -0.243327f, -0.646676f, 0.177221f,  -0.086761f, 0.729729f,
-  -14.710893f, -0.044881f, 0.339003f,  -0.134737f, 0.073621f,  -0.162913f,
-  1.215237f,   0.140723f,  0.138630f,  1.241719f,  0.204092f,  -0.463080f,
-  -0.176086f,  1.125868f,  1.034814f,  0.225455f,  -0.203421f, -0.078787f,
-  -0.527498f,  0.012491f,  -0.563307f, -0.170792f, 0.002679f,  0.116153f,
-  0.211348f,   -0.191900f, -0.212505f, 0.263445f,  -0.074679f, -0.081441f,
-  -0.815405f,  2.448215f,  0.781299f,  0.149542f,  -1.045162f, 0.043014f,
-  0.217381f,   -0.094500f, -0.090427f, 0.025784f,  -0.228906f, -2.741798f,
-  0.230475f,   -0.256112f, -0.103297f, 0.159121f,  -0.229793f, -0.014883f,
-  -0.104131f,  -0.123816f, 0.164148f,  -0.052279f, -0.071845f, -0.041197f,
-  0.208527f,   -0.234197f, -0.542336f, 0.020053f,  0.088870f,  0.014346f,
-  2.502164f,   -0.010244f, -0.267792f, 0.844394f,  2.711486f,  -0.015262f,
-  -0.868053f,  -0.295704f, 0.222289f,  -0.000286f, -0.352098f, -0.079000f,
-  0.021267f,   -0.721739f, -0.240558f, -0.384775f, 0.065974f,  -2.161058f,
-  0.195889f,   0.268966f,  -0.009329f, 0.014949f,  0.314943f,  0.235885f,
-  0.072591f,   -0.127120f, 0.150784f,  0.105697f,  -1.297403f, -0.207509f,
-  -0.217688f,  -0.076752f, 0.170952f,  -0.294235f, 0.449973f,  -1.712690f,
-  0.860989f,   0.054757f,  -0.812627f, -0.105316f, -0.736230f, -0.133192f,
-  -3.741608f,  0.495660f,  -0.288936f, 4.654852f,  -0.021305f, -0.308916f,
-  0.049205f,   -0.259996f, 0.114248f,  -0.252647f, -0.253180f, -0.449314f,
-  0.022979f,   0.063281f,  -0.196154f, 0.078295f,  -0.322317f, -0.145142f,
-  0.300573f,   0.048385f,  -0.254787f, 0.123939f,  -1.263088f, -0.228565f,
-  -0.389061f,  0.391084f,  2.322438f,  0.075009f,  0.225743f,  -0.198808f,
-  -0.280538f,  -0.173939f, -0.120543f, -0.070792f, -0.417187f, -0.781056f,
-  -0.102756f,  -1.760965f, 0.019149f,  -0.867342f, 0.347141f,  0.031588f,
-  0.302572f,   -0.203573f, -0.357320f, -0.096078f, -0.527528f, 0.046699f,
-  -0.108561f,  -0.167077f, -2.851509f, -0.307116f, 0.202720f,  -0.160280f,
-  -0.215525f,  0.064355f,  -0.427220f, 1.516230f,  0.634453f,  0.099400f,
-  -1.013887f,  -0.029740f, -0.093426f, -0.044272f, -1.297636f, -0.237614f,
-  -0.160953f,  0.399036f,  -0.030685f, -0.113619f, -0.184704f, 0.040519f,
-  -0.588252f,  -0.210235f, -0.067623f, -0.031841f, -0.107261f, -0.192582f,
-  -0.253959f,  -0.430821f, -0.103184f, -0.280185f, -0.357723f, 0.197761f,
-  -0.175087f,  -0.055171f, 1.642014f,  -0.192559f, -0.288147f, 0.610311f,
-  4.688195f,   -0.128728f, -0.914869f, -0.108286f, 0.013789f,  0.092125f,
-  0.019770f,   -0.178386f, 0.074164f,  -1.152658f, -0.216738f, -0.277286f,
-  0.012381f,   0.418259f,  -0.680727f, -0.221917f, -0.485946f, 0.101672f,
-  2.009457f,   0.054302f,  1.019838f,  -0.116170f, 0.165134f,  -0.112567f,
-  0.852632f,   -0.385796f, -0.108666f, 0.053181f,  -0.311797f, -0.372875f,
-  -0.675717f,  2.409268f,  -0.514720f, -0.214245f, -0.646596f, 0.009756f,
-  0.203993f,   0.093617f,  -0.301290f, 0.253551f,  -0.128909f, -1.448442f,
-  -0.186823f,  -0.278001f, -0.294993f, -0.176928f, -0.473605f, 0.062049f,
-  -0.212084f,  -0.137326f, 0.012505f,  0.087850f,  -0.200413f, -0.394119f,
-  -0.132224f,  0.146917f,  0.155746f,  0.198725f,  -0.322541f, 0.196391f,
-  -0.945500f,  0.036736f,  -0.155646f, -0.677341f, 1.130545f,  -0.339554f,
-  0.411628f,   -0.355813f, -0.249843f, 0.213694f,  -2.035607f, 0.055694f,
-  -0.111669f,  0.408696f,  -0.067043f, -0.048182f, 0.398110f,  -0.067542f,
-  1.459801f,   0.236833f,  -0.178806f, 0.168758f,  0.492387f,  0.099691f,
-  -0.776680f,  -0.172865f, 0.204225f,  0.193982f,  0.575685f,  -0.062248f,
-  0.011486f,   0.058571f,  -0.493391f, 0.026893f,  -0.900467f, 3.793129f,
-  -0.634613f,  -0.064660f, -0.048262f, 0.361905f,  0.033641f,  0.245171f,
-  -0.064671f,  0.034954f,  0.204358f,  -0.904023f, -0.052714f, -0.250134f,
-  0.136700f,   0.000734f,  -0.371720f, 0.226483f,  0.217958f,  0.060559f,
-  0.180111f,   0.000970f,  0.079556f,  -0.096775f, 0.093855f,  -0.026224f,
-  -0.243664f,  0.004290f,  0.123281f,  -0.239476f, 1.230374f,  -0.107826f,
-  -0.101982f,  -0.153917f, 5.464427f,  0.304375f,  -0.809957f, 0.090564f,
-  -0.278416f,  -0.245555f, -2.078421f, 0.243093f,  -0.127666f, 0.052451f,
-  -0.126662f,  -0.783505f, 0.025149f,  -1.422675f, -0.207769f, -0.362547f,
-  0.115310f,   0.133390f,  1.264754f,  -0.027055f, -0.485312f, -0.240717f,
-  -0.239722f,  0.146818f,  -1.265043f, -0.235553f, 0.267104f,  -0.021357f,
-  -0.435949f,  -0.309371f, 0.049920f,  1.302721f,  -0.233978f, -0.097551f,
-  -0.240631f,  -0.287821f, -0.378380f, -0.273131f, -3.075169f, 0.226404f,
-  -0.029361f,  2.703590f,  -0.430659f, 0.067927f,  -0.387520f, -0.370630f,
-  -0.229236f,  0.085653f,  -0.370956f, -0.065556f, -0.187859f, 0.068309f,
-  -0.109299f,  -0.259898f, -0.103644f, -0.271199f, -0.209350f, 0.140993f,
-  -0.196713f,  -0.135508f, -1.423209f, -0.406385f, -0.019956f, -0.864694f,
-  5.963707f,   -0.201157f, 0.726377f,  -0.011076f, 0.010553f,  -0.102918f,
-  -2.230088f,  -0.258098f, -0.039547f, -0.029262f, -0.082324f, -0.860222f,
-  -0.094735f,  -1.381839f, 0.587298f,  -0.173048f, 0.721360f,  0.241900f,
-  0.764302f,   -0.023609f, -1.173755f, 0.103912f,  -0.185363f, 0.078435f,
-  -2.245062f,  -0.127269f, 0.202234f,  0.158975f,  -0.260909f, 0.098608f,
-  -0.348247f,  1.732502f,  -0.412298f, -0.269602f, -0.425771f, -0.146243f,
-  -0.530730f,  0.125716f,  -1.004419f, 0.145109f,  -0.059289f, 1.096304f,
-  0.012891f,   0.045033f,  -0.306875f, 0.003514f,  -0.176110f, 0.037544f,
-  -0.441537f,  -0.518921f, -0.262149f, -0.060407f, -0.379419f, -0.141245f,
-  -0.128894f,  -0.176537f, -1.161318f, -0.249100f, -0.118330f, 0.042816f,
-  1.173404f,   0.088312f,  -0.393568f, -0.175134f, 6.529819f,  -0.326652f,
-  -0.631917f,  -0.393476f, 0.057781f,  -0.217748f, -1.781139f, -0.012614f,
-  -0.212621f,  -0.720322f, -0.218498f, -0.388556f, -0.254796f, -0.248399f,
-  -0.608744f,  -0.265146f, 0.238517f,  0.066882f,  -2.916806f, 0.054642f,
-  0.282590f,   0.075248f,  0.010188f,  -0.133486f, 0.985945f,  -0.045849f,
-  -0.347564f,  0.057320f,  -0.417920f, 0.063664f,  0.387062f,  -2.692059f,
-  -0.535549f,  0.263736f,  0.327889f,  -0.070273f, -0.775254f, 0.147250f,
-  3.309425f,   -0.212191f, -0.067204f, -2.912663f, -0.061496f, 0.084233f,
-  0.022907f,   0.138421f,  -0.112159f, -0.288447f, -0.010799f, 0.056049f,
-  -0.036527f,  0.021525f,  0.106649f,  -0.291883f, 0.088424f,  -0.057773f,
-  -0.086031f,  0.015277f,  -0.318505f, -0.269049f, -1.008913f, -0.224785f,
-  -0.025820f,  -0.649037f, 0.706381f,  0.096410f,  0.643776f,  -0.046743f,
-  -0.009654f,  -0.024246f, 1.469255f,  -0.183536f, -0.370046f, -0.048442f,
-  -0.376527f,  -0.431264f, -0.245109f, -0.093951f, 0.203683f,  -0.099872f,
-  0.087210f,   0.160692f,  -3.527694f, -0.068891f, -0.228994f, -0.231817f,
-  -0.241949f,  0.193613f,  0.979597f,  -0.091259f, 0.414424f,  -0.047341f,
-  -0.209582f,  -0.295134f, -0.016824f, 0.460327f,  -0.072671f, 0.246234f,
-  0.235896f,   0.127238f,  -1.068683f, 0.035648f,  2.254888f,  0.180105f,
-  -0.260098f,  -2.322120f, -0.184249f, -0.314801f, -0.099969f, -0.272117f,
-  -0.237916f,  0.031103f,  -0.274063f, -0.049384f, -0.044917f, 0.102477f,
-  -0.342148f,  -0.257558f, -0.346300f, 0.115333f,  -0.115456f, 0.208354f,
-  -0.359301f,  -0.167395f, 1.146514f,  -0.177861f, -0.098658f, -0.444570f,
-  6.759993f,   -0.369772f, -0.831118f, 0.001866f,  -0.073298f, -0.072095f,
-  0.811902f,   -0.431997f, -0.286587f, -0.269500f, 0.111492f,  -0.525364f,
-  -0.351785f,  -2.463474f, -1.852659f, 0.135325f,  0.138267f,  0.100643f,
-  -2.373278f,  -0.285514f, -0.395388f, -0.185016f, -0.030249f, -0.005767f,
-  -0.716424f,  -0.031674f, 0.011147f,  0.057405f,  -0.215873f, -0.094401f,
-  0.573528f,   -1.223820f, 0.414852f,  -0.059053f, -0.076488f, -0.287168f,
-  -0.842640f,  0.174084f,  -0.567186f, 0.336629f,  -0.062514f, 2.075448f,
-  -0.061680f,  -0.131529f, -0.098994f, -0.204111f, -0.347865f, 0.108516f,
-  -0.049616f,  -0.069212f, -0.273935f, -0.096545f, -0.210784f, -0.284698f,
-  0.141501f,   -0.176924f, -0.361341f, -0.251197f, -0.286694f, 0.245569f,
-  -1.521661f,  -0.122639f, -0.015760f, -0.718912f, 5.877828f,  0.146916f,
-  0.151767f,   0.220785f,  -0.032298f, 0.230902f,  0.663943f,  -0.252613f,
-  0.057718f,   -0.436038f, -0.323994f, -1.139787f, -0.042489f, -1.326298f,
-  -1.031206f,  -0.104136f, 0.389897f,  0.127602f,  -2.667789f, -0.212366f,
-  -0.506262f,  -0.009115f, -0.213202f, 0.076167f,  -1.629405f, 0.055129f,
-  0.375393f,   -0.150272f, -0.241515f, -0.326497f, 0.100069f,  0.410703f,
-  0.340622f,   0.042437f,  -0.349945f, 0.041176f,  -1.178950f, 0.030992f,
-  0.933908f,   -0.035844f, -0.098660f, 1.030584f,  -0.092043f, -0.355739f,
-  -0.305562f,  0.036161f,  -0.049558f, -0.033225f, -0.403856f, -0.088276f,
-  0.215493f,   -0.149105f, -0.013363f, 0.025886f,  -0.101306f, -0.205781f,
-  -1.072487f,  -0.076019f, 0.077555f,  0.131003f,  1.267763f,  -0.008954f,
-  -0.327617f,  -0.246539f, 6.664081f,  -0.404403f, -1.442489f, 0.191301f,
-  -0.336361f,  0.181156f,  0.833108f,  0.007879f,  -0.194464f, -1.029408f,
-  -0.036268f,  -0.927110f, -0.379190f, -0.293443f, -1.848579f, -0.242548f,
-  -0.065990f,  0.203160f,  -0.291788f, 0.000680f,  0.587011f,  -0.241289f,
-  0.037034f,   0.000552f,  1.072308f,  -0.387230f, -0.230050f, 0.292322f,
-  -0.720001f,  0.034109f,  -0.467260f, 2.211644f,  -1.839191f, -0.048797f,
-  -0.083469f,  -0.334686f, -0.269056f, 0.051295f,  1.319904f,  -0.035603f,
-  -0.018457f,  -0.824915f, -0.212285f, -0.230516f, -0.035093f, -0.400843f,
-  -0.305469f,  -0.099011f, 0.014225f,  -0.452772f, 0.170331f,  -0.389312f,
-  -0.115084f,  -0.014770f, -0.429387f, -0.155961f, -0.568200f, -0.037853f,
-  -0.125137f,  0.067228f,  -1.329271f, -0.117874f, -0.132499f, -0.218376f,
-  -0.588325f,  -0.320024f, 0.085695f,  -0.235047f, -0.217790f, 0.103015f,
-  -0.698644f,  0.017766f,  -0.058299f, 0.199411f,  -0.122485f, -0.563949f,
-  -0.349011f,  -0.557045f, -0.131165f, 0.002281f,  0.118559f,  -0.210302f,
-  -1.153815f,  0.116738f,  -0.236007f, -0.003487f, -0.006885f, -0.244816f,
-  0.953222f,   0.093748f,  0.266869f,  0.241869f,  -0.860832f, -0.387012f,
-  -0.338986f,  2.097515f,  -1.942512f, -0.298021f, 0.543911f,  -0.043214f,
-  0.082125f,   -0.120242f, 0.712231f,  0.213327f,  -0.301687f, -0.544011f,
-  -0.392131f,  0.004302f,  0.004825f,  -0.317440f, -0.107518f, -0.293407f,
-  -0.159111f,  -0.080367f, 0.132663f,  -0.017726f, -0.237521f, -0.190297f,
-  -0.361633f,  0.200518f,  -0.538296f, -0.027975f, -0.381704f, -0.016963f,
-  0.630105f,   -0.190997f, -0.287840f, -0.603488f, 3.605598f,  -0.276614f,
-  -1.346383f,  0.186912f,  -0.047575f, -0.189232f, -1.519072f, 0.097816f,
-  -0.223722f,  0.304924f,  -0.213022f, -1.052433f, -0.322283f, -1.706734f,
-  -2.458027f,  0.237976f,  0.171050f,  -0.103139f, -0.278689f, 0.329824f,
-  -0.262448f,  -0.122916f, -0.236398f, -0.013848f, -0.969160f, -0.374907f,
-  0.091018f,   -0.386471f, -0.723940f, 0.064956f,  -0.057652f, 1.321024f,
-  -1.397418f,  -0.143136f, 0.272468f,  -0.030749f, 0.037324f,  0.069316f,
-  -0.904925f,  -0.333693f, -0.117709f, 2.279598f,  -0.428065f, -0.131157f,
-  -0.014288f,  -0.402862f, -0.666090f, 0.017070f,  -0.028333f, 0.002481f,
-  0.197156f,   -0.038120f, -0.271062f, -0.188275f, -0.021370f, -0.070849f,
-  -0.905007f,  -0.095886f, -0.093055f, -0.121821f, -1.239812f, -0.411799f,
-  -0.089948f,  -0.936827f, 1.437569f,  -0.388908f, 0.126170f,  0.186162f,
-  -0.018819f,  -0.138364f, -1.066412f, -0.138222f, -0.022186f, 0.107331f,
-  -0.230436f,  -1.352605f, -0.161323f, -1.081810f, -0.933825f, -0.136675f,
-  0.378157f,   0.113377f,  -0.850610f, 0.080245f,  -0.087305f, -0.002852f,
-  0.044408f,   -0.188172f, -1.891998f, 0.092189f,  0.125325f,  -0.105090f,
-  -0.848510f,  -0.396308f, -0.384130f, 2.007509f,  -1.480787f, -0.126946f,
-  0.314767f,   0.000195f,  -0.285628f, -0.110442f, -0.293948f, 0.258559f,
-  -0.417603f,  1.570705f,  0.092459f,  -0.340974f, -0.284754f, -0.007801f,
-  -0.324610f,  -0.004734f, -0.207716f, -0.057175f, 0.055467f,  -0.210830f,
-  -0.113005f,  -0.299177f, 0.068074f,  0.017929f,  -2.897598f, -0.260074f,
-  -0.014422f,  -0.206467f, 1.246997f,  -0.372863f, -0.214160f, -0.114035f,
-  5.805862f,   0.003611f,  -1.340990f, -0.021085f, -0.260431f, -0.002720f,
-  -1.251640f,  -0.353531f, -0.304009f, -0.153376f,
-};
-
-static const float av1_ab_partition_nn_bias_32_layer1[LABEL_SIZE] = {
-  -0.521497f, -1.061572f, -0.078756f, -0.660662f, -0.403741f, -0.960163f,
-  0.001427f,  0.523607f,  0.225068f,  -0.055273f, 1.019519f,  1.181880f,
-  -0.010198f, 0.130597f,  1.276752f,  2.028188f,
-};
-
-static const NN_CONFIG av1_ab_partition_nnconfig_32 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      64,  // num_hidden_nodes
-  },
-  {
-      av1_ab_partition_nn_weights_32_layer0,
-      av1_ab_partition_nn_weights_32_layer1,
-  },
-  {
-      av1_ab_partition_nn_bias_32_layer0,
-      av1_ab_partition_nn_bias_32_layer1,
-  },
-};
-
-// nn model for ab partition pruning, 16x16.
-static const float av1_ab_partition_nn_weights_16_layer0[FEATURE_SIZE * 64] = {
-  0.151902f,  0.007947f,  -1.788454f, 0.431869f,  -2.971387f, 0.923566f,
-  1.632542f,  -1.665136f, -0.338632f, -5.075884f, 0.398267f,  0.030467f,
-  2.263534f,  -0.045532f, -1.066128f, 0.915139f,  -0.560500f, -3.293125f,
-  2.072793f,  -1.011414f, 0.122716f,  -0.060169f, -0.388860f, 0.031019f,
-  -0.381861f, 0.001551f,  -0.328472f, 0.038296f,  -0.060398f, -0.375556f,
-  0.209226f,  0.014764f,  -1.443469f, -0.345486f, 2.409269f,  1.524846f,
-  -0.640666f, 1.322139f,  -2.074771f, -0.580944f, -0.203960f, -0.072893f,
-  0.329701f,  0.115339f,  -1.339542f, 0.249024f,  -0.421545f, -0.409151f,
-  -0.258293f, 0.836288f,  -0.073685f, -0.009624f, 0.895712f,  0.320639f,
-  0.451002f,  -1.544558f, 0.193709f,  -1.389012f, 1.305451f,  0.089795f,
-  0.050338f,  -0.017433f, -0.304667f, 0.500729f,  0.504346f,  0.073757f,
-  0.582649f,  -0.993623f, 1.766766f,  -3.067265f, -0.415774f, -0.006036f,
-  -1.245281f, 0.253205f,  -0.591245f, -0.626238f, 0.551852f,  0.593755f,
-  0.491023f,  1.099384f,  -0.348448f, 0.054564f,  -0.451422f, -0.375781f,
-  -0.248390f, -0.052548f, -0.380069f, -0.165391f, -0.297968f, -0.052142f,
-  -0.316381f, -0.045246f, -0.243905f, -0.034169f, -0.247523f, -0.180773f,
-  0.068066f,  -0.374920f, 0.057536f,  -0.189748f, 0.058375f,  -0.267749f,
-  -0.147286f, -0.246153f, 0.006183f,  -0.202029f, -0.059128f, 0.116852f,
-  0.134719f,  -0.126900f, -0.064646f, -0.196458f, -0.182331f, 0.108029f,
-  -0.264499f, 0.155816f,  -0.107255f, -0.056983f, -0.209771f, -0.099070f,
-  0.007313f,  -0.254124f, -0.231964f, -0.275972f, 0.032098f,  -0.264564f,
-  -0.208743f, 0.155599f,  -0.121511f, -0.156145f, -0.162315f, -0.059788f,
-  -0.257073f, -0.076654f, -0.110616f, -0.321675f, -0.051952f, 0.006301f,
-  -0.154114f, 0.017032f,  -0.017364f, -0.233247f, 0.009918f,  -0.179289f,
-  -0.190722f, 0.147106f,  -0.063910f, -0.396872f, -0.263123f, -0.003850f,
-  -0.040718f, -0.324699f, 0.118660f,  -0.170727f, -0.316788f, 0.100886f,
-  -0.202842f, 0.045371f,  0.150561f,  -0.057054f, -0.308150f, 0.028346f,
-  -0.381473f, -0.195365f, 0.026221f,  -0.281795f, 0.087204f,  0.047689f,
-  -0.027643f, -0.104724f, -0.089030f, -0.117661f, -0.349160f, 0.056982f,
-  -0.340273f, 0.048086f,  0.046103f,  -0.121527f, 0.021697f,  0.054109f,
-  -0.002768f, -0.008461f, -2.297240f, 0.124651f,  3.621661f,  -0.057120f,
-  -1.151656f, 2.296894f,  -3.678720f, -0.290240f, 0.087683f,  -0.186389f,
-  0.007656f,  -0.090236f, -0.245217f, 0.110389f,  -0.251719f, -0.029084f,
-  -0.128203f, -0.100005f, -0.032779f, 0.007281f,  -0.366596f, -0.267870f,
-  -0.215620f, 0.047687f,  0.010303f,  0.097980f,  -0.191569f, -0.341162f,
-  0.119249f,  0.026279f,  -2.161546f, 0.459591f,  1.290566f,  1.791797f,
-  -0.409835f, 0.127081f,  -1.156367f, 0.198286f,  0.099561f,  -0.067445f,
-  -0.034352f, 0.017966f,  -0.277380f, -0.057220f, -0.174198f, -0.014164f,
-  0.146090f,  -0.357530f, 0.097644f,  -0.000932f, 0.446603f,  -0.066793f,
-  2.448620f,  0.937617f,  -1.232922f, 0.313183f,  0.816827f,  -0.275115f,
-  -0.245205f, -0.126895f, 0.156668f,  -0.186977f, -0.273505f, 0.013315f,
-  0.168629f,  -0.089084f, 0.006166f,  -0.116107f, -0.199316f, -0.024010f,
-  -0.242303f, 0.011612f,  -0.218485f, -0.229661f, -0.123922f, 0.136699f,
-  0.006732f,  -0.148718f, -0.164225f, 0.116063f,  1.587898f,  0.690519f,
-  0.360566f,  0.009739f,  -0.678702f, -0.046003f, 0.126984f,  0.605212f,
-  1.240663f,  -0.000228f, -1.119369f, -0.415589f, -0.721003f, 0.097936f,
-  -1.410586f, -2.358833f, -2.773129f, -3.983361f, -0.087144f, -0.050029f,
-  -0.242255f, 0.137424f,  -0.307490f, -0.084637f, -0.023812f, -0.196582f,
-  -0.078695f, 0.038257f,  -0.012110f, -0.263521f, 0.009839f,  -0.109125f,
-  -0.226036f, 0.060712f,  0.093671f,  0.153143f,  0.039116f,  -0.290891f,
-  0.227057f,  -0.204633f, -0.207539f, -0.148242f, 0.046204f,  -0.231268f,
-  -0.209315f, -0.307579f, -0.436556f, 0.023475f,  0.131793f,  -0.038301f,
-  1.650584f,  0.392570f,  1.446576f,  1.254380f,  -0.516867f, -0.057116f,
-  0.149320f,  0.414424f,  -0.246309f, 0.003877f,  -0.480238f, -1.037035f,
-  -0.830779f, -1.122244f, -0.408267f, -0.253956f, 0.382005f,  0.940609f,
-  -1.113370f, -0.018554f, 0.141064f,  -0.182504f, 1.270707f,  0.414904f,
-  -0.216036f, 0.203831f,  0.450716f,  -0.452909f, 0.139358f,  -0.027143f,
-  1.956892f,  1.643732f,  -0.867839f, -0.620520f, -0.334607f, -0.519982f,
-  0.205023f,  0.661159f,  -0.000809f, 0.049033f,  -0.348579f, -0.200338f,
-  -0.362144f, -0.346590f, -0.230096f, 0.180746f,  -0.149954f, -0.253429f,
-  -0.378170f, -0.040724f, -0.041597f, 0.243659f,  -0.472181f, 0.015401f,
-  -0.180376f, 0.153139f,  -0.247738f, -0.010485f, -0.157158f, 0.016825f,
-  -0.238925f, -0.265798f, -0.318374f, 0.142352f,  -0.210520f, 0.051928f,
-  -0.352190f, -0.179052f, -0.185498f, 0.025540f,  -0.111667f, -0.235187f,
-  -0.215454f, 0.010931f,  -0.238372f, -0.126659f, 0.075691f,  -0.091167f,
-  -2.462379f, -0.007950f, -0.637990f, 0.285554f,  -0.051275f, 0.282279f,
-  -0.744083f, -0.570646f, 0.592198f,  1.421332f,  -0.256027f, -0.140315f,
-  0.160247f,  -0.063185f, -0.055895f, -0.199864f, -0.287353f, -0.074561f,
-  -0.071228f, 0.055864f,  -1.084764f, -0.263409f, 0.779266f,  0.228187f,
-  0.375013f,  0.121204f,  -0.656948f, 0.533561f,  0.272671f,  -0.015423f,
-  -0.124180f, -0.009127f, 2.934838f,  -0.150998f, 1.163152f,  0.081997f,
-  -4.715939f, -3.676595f, -1.524886f, -0.167593f, 0.281186f,  0.024046f,
-  -1.451709f, 0.332558f,  0.990504f,  0.376290f,  -1.466773f, -0.448439f,
-  -2.929108f, -4.255188f, 0.065238f,  0.019950f,  1.372393f,  0.444052f,
-  -2.538772f, 1.579767f,  -0.464911f, -1.866114f, 1.053958f,  0.434467f,
-  -0.125964f, 0.034671f,  0.077116f,  -0.138466f, -0.413395f, -0.223453f,
-  -0.172127f, -0.251265f, -0.048239f, -0.395519f, 0.023141f,  0.037459f,
-  -0.249593f, -0.062215f, -0.047209f, -0.435189f, -0.164155f, -0.077590f,
-  -0.241164f, -0.126128f, -0.038243f, -0.180888f, 0.198840f,  -0.328036f,
-  -0.169790f, 0.036506f,  0.052572f,  -0.183570f, -0.073617f, -0.244959f,
-  0.266498f,  0.032846f,  -1.902106f, 0.486078f,  2.414993f,  0.975182f,
-  -0.382875f, 1.647810f,  -2.197017f, -0.890107f, 0.221287f,  0.010889f,
-  3.817042f,  0.572728f,  0.092466f,  0.473337f,  -1.634659f, -1.069455f,
-  1.486776f,  -1.023850f, 0.088184f,  0.008842f,  0.518202f,  0.270259f,
-  1.757191f,  -0.121839f, -2.912229f, -1.250866f, -2.381808f, 0.335309f,
-  -0.120079f, -0.061294f, -0.058725f, -0.315169f, -0.262443f, 0.072434f,
-  -0.267836f, -0.319354f, -0.274975f, 0.068970f,  -0.406467f, 0.044074f,
-  -0.152311f, -0.333656f, -0.228355f, -0.185613f, 0.017346f,  -0.177674f,
-  -0.090675f, -0.102047f, -0.011768f, -0.025280f, -0.271661f, 0.098099f,
-  -0.312272f, -0.222217f, -0.100548f, 0.106260f,  -0.034655f, 0.135109f,
-  -0.021276f, 0.018177f,  -0.353097f, -0.011128f, 0.061136f,  -0.511662f,
-  -0.223236f, -0.308841f, 0.118789f,  -0.154628f, -0.053178f, -0.055973f,
-  0.013175f,  -0.368337f, -0.090863f, -0.116920f, 0.178990f,  -0.025278f,
-  -0.190553f, -0.238092f, 0.303943f,  -0.024944f, 0.719373f,  0.384332f,
-  -0.378480f, -0.423316f, 0.709922f,  0.758514f,  -1.559023f, -2.503173f,
-  0.068652f,  -0.234741f, -0.182932f, 0.037878f,  0.020684f,  -0.174142f,
-  -0.182300f, -0.052796f, -0.219145f, 0.113028f,  -1.041826f, 0.035317f,
-  0.919904f,  -0.676011f, 0.652297f,  1.456447f,  -0.166904f, -0.861823f,
-  0.895827f,  0.429821f,  -0.180376f, -0.076587f, -0.273945f, -0.288990f,
-  -0.206692f, -0.080745f, -0.085444f, 0.186953f,  -0.050135f, 0.044243f,
-  -0.391706f, -0.160498f, -0.292268f, 0.164060f,  0.412649f,  0.211611f,
-  -0.327294f, -0.919399f, 0.320297f,  0.385284f,  -0.088848f, -0.072556f,
-  -0.384813f, -0.176267f, -0.065918f, 0.134724f,  -0.231104f, -0.337707f,
-  -0.195442f, -0.263569f, 0.098090f,  -0.341411f, -0.189211f, -0.439276f,
-  -0.404046f, 0.262491f,  -0.311093f, -0.086454f, -0.013400f, -0.061447f,
-  -0.026945f, -0.112036f, -0.322985f, 0.078500f,  -0.230205f, -0.344535f,
-  -0.021087f, 0.110220f,  -0.128671f, 0.044219f,
-};
-
-static const float av1_ab_partition_nn_bias_16_layer0[64] = {
-  2.936406f,  -0.396539f, -0.110456f, -1.254954f, 0.785350f,  0.516290f,
-  -0.172341f, 0.254386f,  -0.192465f, -0.106751f, -0.055518f, -0.094994f,
-  0.000000f,  -0.065018f, -0.004908f, -0.130483f, -0.119580f, -0.142072f,
-  0.457446f,  -0.125051f, -0.107712f, 0.714607f,  -0.140809f, -1.788650f,
-  -0.087199f, 0.000000f,  -1.290050f, 0.443930f,  -0.110634f, -0.109380f,
-  -0.188213f, -1.414179f, 1.193579f,  0.388775f,  -0.873193f, -0.110050f,
-  -0.072565f, -0.117050f, -0.119132f, 0.456959f,  -0.132069f, 0.131974f,
-  1.160474f,  1.746465f,  0.442628f,  -0.188849f, -0.207794f, -0.108364f,
-  -0.856655f, -2.141620f, 0.335476f,  -0.105508f, -0.212162f, -0.109319f,
-  -0.237213f, -0.109980f, -0.291044f, -0.137877f, 0.470191f,  -0.023908f,
-  0.123809f,  -0.109797f, 0.200510f,  -0.147542f,
-};
-
-static const float av1_ab_partition_nn_weights_16_layer1[64 * LABEL_SIZE] = {
-  -6.823716f, 1.406568f,  -0.144009f, 2.228765f,  0.838336f,  0.738107f,
-  -0.319014f, -0.148756f, 0.240862f,  -0.111089f, -0.004241f, 0.025758f,
-  -0.193820f, -0.246362f, -0.181363f, -0.201556f, 0.024268f,  0.252994f,
-  -0.289443f, 0.194932f,  0.057467f,  0.724735f,  0.014063f,  1.361352f,
-  0.025191f,  0.024274f,  0.231462f,  -7.227959f, -0.094515f, 0.039946f,
-  0.412719f,  0.812318f,  3.038903f,  -0.286289f, 0.647482f,  -0.115114f,
-  0.053590f,  0.066069f,  0.153134f,  0.996250f,  -0.125700f, 0.951365f,
-  -6.243494f, -4.827697f, 0.566320f,  0.239515f,  -0.099702f, 0.054546f,
-  1.847330f,  3.680076f,  -3.049829f, -0.127709f, 0.068469f,  -0.017794f,
-  0.223864f,  -0.106778f, -0.020425f, -0.040226f, -0.251890f, -0.168673f,
-  -0.552073f, 0.043311f,  0.218668f,  0.033209f,  -3.199210f, 0.193079f,
-  0.321406f,  0.718307f,  -0.181418f, -0.459612f, -1.981170f, 0.968496f,
-  -0.029757f, -0.130065f, 0.043782f,  0.072394f,  -0.088686f, 0.025322f,
-  0.129882f,  0.101324f,  0.335707f,  0.072714f,  -2.079774f, 0.203997f,
-  0.239321f,  -0.301757f, 0.257845f,  1.288382f,  -0.031275f, -0.234194f,
-  0.310722f,  2.045469f,  0.034716f,  0.135638f,  -0.251388f, 0.320071f,
-  -1.065301f, -0.322731f, -0.545028f, 0.226276f,  0.090799f,  0.019289f,
-  0.048950f,  -1.079300f, 0.231938f,  0.083683f,  4.762127f,  0.145037f,
-  -0.145549f, 0.075592f,  0.172336f,  0.108175f,  0.333751f,  1.090501f,
-  1.056114f,  0.047073f,  0.182052f,  -0.081587f, 0.089900f,  0.339286f,
-  2.049988f,  0.073585f,  0.537355f,  -0.243322f, -0.010179f, -0.052601f,
-  -0.174915f, 0.117793f,  2.222990f,  -2.520837f, -0.092699f, 1.199887f,
-  0.138720f,  0.679918f,  -0.463155f, -0.659496f, -0.109913f, -0.003398f,
-  0.114633f,  -0.128377f, 0.092970f,  -0.107489f, -0.191078f, 0.185182f,
-  0.216980f,  -0.019343f, 3.443133f,  0.287953f,  0.099314f,  0.985958f,
-  0.157268f,  -0.606516f, 0.049418f,  -0.221809f, -0.453081f, -0.344796f,
-  -0.003735f, -0.107269f, -0.128541f, -0.259543f, -0.934806f, -0.542456f,
-  -1.011192f, 0.022795f,  0.186363f,  -0.076356f, -0.050932f, -0.165098f,
-  0.168177f,  -0.101596f, -5.270886f, 2.553943f,  -0.440870f, -0.017494f,
-  0.215208f,  -0.017032f, 1.495915f,  -4.304677f, 0.762211f,  0.182937f,
-  0.254406f,  -0.029433f, -0.088364f, -0.110160f, -0.108257f, -0.036538f,
-  0.737697f,  -0.234989f, 0.168095f,  0.245118f,  -0.077262f, 0.195718f,
-  0.753302f,  -1.637869f, 0.126227f,  0.982129f,  -0.121444f, -0.295570f,
-  -1.215799f, 0.147867f,  -0.068496f, 0.132726f,  -0.005772f, -0.181774f,
-  0.126513f,  0.204723f,  -0.366123f, 0.103906f,  -0.148053f, -0.075272f,
-  0.243884f,  -0.104828f, 0.198988f,  0.501034f,  -0.112671f, 0.111421f,
-  0.167508f,  -0.117803f, -0.738624f, 2.046292f,  0.124011f,  0.057983f,
-  -0.359154f, -0.648883f, -0.259462f, -0.459041f, -2.501223f, -0.065138f,
-  0.122417f,  0.060291f,  -0.129033f, -0.843086f, 0.268241f,  -0.399927f,
-  1.585888f,  1.816393f,  -0.631427f, 0.127826f,  0.088105f,  0.073488f,
-  0.717694f,  -1.497362f, 2.608528f,  0.066896f,  -0.079230f, 0.223436f,
-  -0.010530f, 0.175310f,  1.120365f,  0.034391f,  0.835312f,  0.071652f,
-  -0.080615f, 0.111395f,  0.162742f,  0.079927f,  -3.859582f, -0.638431f,
-  -0.167880f, -0.992659f, -0.885355f, -1.276197f, 1.334344f,  0.931940f,
-  -0.078244f, -0.149030f, -0.070974f, -0.133566f, 0.200034f,  0.102793f,
-  -0.048546f, 0.063545f,  0.023864f,  -0.190863f, 1.934257f,  -0.136286f,
-  -0.107916f, -0.637468f, 0.066449f,  1.089693f,  -0.214047f, -0.265780f,
-  0.899660f,  -0.130333f, 0.288311f,  -0.049024f, 0.090202f,  0.487969f,
-  0.339704f,  0.858479f,  0.841253f,  -0.184100f, -0.637070f, -0.125071f,
-  -0.077650f, -0.087877f, 0.202268f,  -0.027300f, 2.842862f,  -0.100698f,
-  -0.259080f, 0.260556f,  0.157912f,  -0.070364f, 0.467190f,  1.200037f,
-  1.419317f,  -0.033588f, -0.227824f, 0.292617f,  0.228574f,  0.213839f,
-  -1.091099f, -0.022258f, -1.294681f, 0.136118f,  0.081652f,  -0.185359f,
-  -0.039706f, 0.191407f,  -2.053219f, -0.261934f, 0.047812f,  -0.029536f,
-  -0.823869f, -1.090534f, -0.755890f, 0.441035f,  -0.167945f, 0.231441f,
-  -0.135013f, -0.260762f, 0.256872f,  0.130339f,  -0.243751f, 0.189760f,
-  -0.288454f, 0.145363f,  0.338490f,  0.403898f,  -0.022814f, -1.263598f,
-  -0.101315f, 0.860135f,  0.136511f,  0.028942f,  0.574047f,  2.656370f,
-  0.037587f,  -0.188690f, -0.125312f, 1.100435f,  -1.080402f, 0.380905f,
-  0.004635f,  0.097144f,  -0.214309f, 0.085552f,  -0.285066f, -0.705134f,
-  -0.054704f, -0.319951f, 5.486626f,  0.958158f,  -1.380585f, 0.223340f,
-  -0.169167f, -0.170697f, -0.216748f, 0.324232f,  2.684204f,  -0.008490f,
-  -0.211052f, -0.201190f, 0.123466f,  -0.000234f, 0.579907f,  0.096938f,
-  -0.042745f, 0.201855f,  0.157195f,  -0.261440f, 0.029699f,  -0.046599f,
-  1.618216f,  -2.596280f, -0.377420f, -0.526725f, -0.493592f, -0.579615f,
-  0.579699f,  -0.100392f, 0.150694f,  0.061794f,  0.200425f,  -0.062515f,
-  -0.179122f, 0.250112f,  -0.344675f, -0.118359f, -0.095670f, 0.152311f,
-  3.662276f,  -0.154921f, -0.312991f, 0.972008f,  -0.308596f, -0.190426f,
-  0.133889f,  -0.238673f, -0.094726f, 1.683835f,  -0.215629f, -0.198890f,
-  -0.035278f, -0.367973f, -0.822435f, 0.240848f,  -0.194656f, 0.034655f,
-  -0.079424f, 0.146670f,  0.026646f,  -0.034507f, 0.059467f,  -0.153109f,
-  -0.431033f, 2.552991f,  -1.894091f, -0.180462f, -0.306839f, -0.025648f,
-  1.026326f,  -3.096230f, 1.346935f,  0.033633f,  -0.181827f, 0.094376f,
-  0.001696f,  -0.379264f, -1.069503f, -0.140972f, -0.208769f, -0.195239f,
-  0.281795f,  -0.127251f, 0.180776f,  0.067763f,  0.697124f,  -1.040779f,
-  0.111280f,  0.188351f,  -0.340234f, -0.207790f, -0.720075f, -0.137409f,
-  -0.070310f, -0.032918f, -0.060787f, 0.131484f,  -0.077845f, -0.258652f,
-  0.056911f,  -0.062034f, 0.007663f,  -0.185100f, 1.340361f,  0.014096f,
-  -0.124602f, 0.194241f,  0.128383f,  0.360465f,  0.082979f,  -0.050475f,
-  -0.519294f, 3.323262f,  0.067014f,  0.221203f,  -0.085082f, -0.228606f,
-  -0.916668f, -0.022643f, -1.386737f, -0.131902f, -0.349952f, -0.032874f,
-  -0.189190f, -0.898790f, -0.102394f, -1.017387f, 2.214050f,  1.790253f,
-  -1.913561f, -0.043716f, -0.214924f, -0.194598f, -0.064723f, -1.671793f,
-  2.251166f,  -0.146007f, 0.138527f,  -0.003134f, 0.103665f,  0.006928f,
-  -0.240253f, -0.227464f, 0.578437f,  -0.214724f, 0.503085f,  0.158093f,
-  0.033091f,  0.008061f,  4.815371f,  2.132264f,  0.281850f,  -2.288560f,
-  -0.145012f, 1.296832f,  -0.362401f, -0.403252f, 0.109873f,  0.185746f,
-  0.244764f,  0.172367f,  -0.185588f, 0.139801f,  -0.178254f, 0.068629f,
-  0.358488f,  -0.153969f, -6.433524f, 0.225983f,  -0.138123f, -0.095971f,
-  -0.036089f, -1.400083f, 0.265908f,  0.257787f,  0.181144f,  -1.647228f,
-  -0.136289f, -0.074206f, 0.122988f,  -0.088895f, -1.266717f, 0.006010f,
-  0.536681f,  0.263061f,  -0.032207f, -0.155136f, 0.086431f,  0.441950f,
-  -0.060755f, -0.280683f, -0.783475f, -2.567033f, 1.093221f,  0.117667f,
-  -0.000408f, 0.225719f,  -2.199698f, 0.141447f,  -1.459051f, 0.051315f,
-  0.203228f,  0.354432f,  -0.005775f, -0.028073f, -0.965817f, 0.231083f,
-  -0.666884f, 0.026283f,  -0.317486f, 0.210754f,  0.123897f,  0.223827f,
-  4.214405f,  1.457334f,  -0.253945f, -1.306733f, -0.391235f, 0.451154f,
-  -1.553888f, -0.353429f, 0.069533f,  0.159278f,  -0.173836f, -0.004952f,
-  -0.137033f, 0.127012f,  0.143600f,  0.051587f,  -0.070549f, 0.066509f,
-  -5.776547f, 0.180021f,  -0.189183f, -1.288504f, -0.233575f, -1.473873f,
-  0.140940f,  0.144451f,  -0.104534f, 2.089873f,  -0.168168f, 0.110726f,
-  0.132134f,  -0.215223f, -1.682754f, 0.157757f,  -0.146163f, 0.064882f,
-  0.117313f,  -0.038780f, -0.124720f, -0.501697f, 0.092047f,  -0.233992f,
-  3.324976f,  0.516601f,  1.294202f,  0.119989f,  0.061055f,  0.043420f,
-  -2.750727f, -0.382812f, -0.648496f, -0.115353f, -0.334205f, 0.024354f,
-  -0.282998f, -0.282705f, 0.073798f,  0.169851f,  0.135651f,  0.182677f,
-  -0.040220f, 0.132462f,  -0.303120f, -0.230113f, 6.165739f,  -0.258596f,
-  0.024127f,  -1.388283f, -0.006042f, 0.572600f,  0.348411f,  -0.387376f,
-  -0.075845f, 0.122319f,  -0.029616f, 0.077873f,  0.154763f,  0.049073f,
-  0.018597f,  0.102688f,  -0.204165f, 0.020734f,  -1.389133f, -0.032854f,
-  -0.147561f, 0.853944f,  0.132100f,  -3.259659f, 0.243745f,  0.181529f,
-  -0.738414f, 1.509994f,  0.023470f,  -0.005329f, 0.066115f,  -1.345081f,
-  -1.455402f, -0.172023f, -0.194625f, 0.071885f,  -0.201742f, -0.262402f,
-  0.077601f,  -0.048938f, 0.257993f,  -0.504029f, -2.032415f, 1.158880f,
-  0.448647f,  -0.025633f, 0.117586f,  -0.072275f, -0.673744f, -3.854342f,
-  -0.983843f, 0.047766f,  -0.017193f, -0.215775f, -0.158743f, -0.232042f,
-  -0.509112f, 0.148812f,  0.130122f,  0.006486f,  -0.099016f, 0.022514f,
-  -0.486850f, -0.059623f, 4.012731f,  0.025454f,  0.029059f,  -0.783546f,
-  -0.295260f, 0.322521f,  -0.473201f, -0.172100f, -0.100087f, -0.076516f,
-  -0.258367f, -0.112897f, 0.269364f,  -0.065912f, 0.169022f,  -0.178783f,
-  -0.095114f, 0.122089f,  -2.790099f, -0.100431f, -0.087963f, -0.009431f,
-  -0.087819f, -2.774399f, -0.100757f, 0.013005f,  -0.964533f, 3.236665f,
-  -0.354903f, -0.144169f, -0.166869f, -1.396513f, -0.931271f, -0.046261f,
-  -1.799262f, -0.365269f, 0.108611f,  0.037994f,  0.024747f,  -1.073639f,
-  -0.203158f, -0.935006f, 1.880891f,  1.578385f,  0.726272f,  -0.024546f,
-  -0.011626f, -0.151363f, -1.121716f, -1.787484f, 0.232806f,  0.075451f,
-  0.182899f,  0.092215f,  -0.207347f, -0.030111f, 0.054316f,  0.192481f,
-  0.594639f,  -0.247694f, 0.547471f,  -0.032094f, -0.065000f, 0.007198f,
-  1.605377f,  -0.155945f, -0.066200f, -2.343716f, -1.016283f, -0.079321f,
-  0.919365f,  0.599980f,  0.125545f,  0.265813f,  0.246884f,  0.095385f,
-  -0.260374f, -0.202916f, -0.042770f, 0.234967f,  -0.233139f, -0.326994f,
-  -1.375256f, 0.121766f,  0.077433f,  -1.103569f, 0.019497f,  -1.029185f,
-  0.253905f,  0.206569f,  0.187334f,  -0.237089f, -0.294351f, 0.164137f,
-  0.149696f,  -0.749787f, -0.413433f, 0.976587f,  1.027976f,  -0.285264f,
-  0.209273f,  -0.124762f, 0.050884f,  0.250764f,  -0.082031f, -0.646520f,
-  4.116680f,  0.437336f,  0.671684f,  0.129509f,  -0.078462f, 0.014072f,
-  -0.678232f, 0.094831f,  1.125624f,  0.207070f,  -0.154750f, -0.025780f,
-  -0.103030f, 0.118019f,  -0.908186f, -0.263546f, -1.555324f, -0.236887f,
-  -0.217854f, -0.051790f, 0.017915f,  0.171001f,  1.355562f,  0.094603f,
-  -0.233929f, -1.282169f, -0.773183f, -0.161682f, -0.834565f, -0.286776f,
-  -0.298901f, 0.038162f,  0.251899f,  0.039612f,  -0.022935f, -0.232308f,
-  -0.043855f, -0.192892f, -0.279009f, -0.182234f, -1.272808f, -0.070344f,
-  -0.092432f, -1.915946f, -0.134373f, -1.405496f, -0.067071f, -0.131922f,
-  0.185269f,  1.465082f,  0.040240f,  0.112665f,  0.144329f,  -0.286112f,
-  -0.617649f, 0.916177f,  0.221044f,  -0.079867f, 0.170251f,  -0.093638f,
-  -0.212620f, -0.305945f, -0.234356f, -0.482501f, 3.928472f,  1.241179f,
-  0.355922f,  -0.170848f, -0.189168f, 0.080225f,  -1.357793f, 0.190890f,
-  0.976800f,  -0.068070f, -0.016295f, -0.088623f, -0.129560f, -0.212267f,
-  -0.071537f, -0.219501f, -0.655198f, -0.225188f, -0.116024f, 0.224174f,
-  -0.049715f, -0.178005f, 3.029985f,  -1.141546f, 0.080066f,  -1.932316f,
-  -0.641137f, -0.189564f, 0.935080f,  0.136119f,  0.015558f,  -0.179331f,
-  0.204571f,  0.020350f,  0.009362f,  0.108478f,  0.037076f,  -0.049009f,
-  0.081090f,  -0.180202f, 1.455561f,  -0.081559f, 0.059361f,  0.484971f,
-  0.160923f,  -2.170744f, -0.013204f, 0.126561f,  -0.407122f, 1.223661f,
-  0.044262f,  0.118044f,  0.058274f,  -1.747100f, -0.171318f, 0.971374f,
-  0.306995f,  -0.103268f, -0.319443f, -0.333176f, -0.038608f, 0.119674f,
-  -0.106479f, -0.907933f, 1.121231f,  1.673840f,  -0.421458f, -0.021146f,
-  -0.254838f, 0.097632f,  0.235109f,  -2.901782f, 0.289518f,  -0.355459f,
-  -0.068264f, -0.179121f, 0.068560f,  -0.047570f, -0.522523f, -0.228963f,
-  -1.037158f, -0.163723f, 0.280563f,  -0.000868f, -0.197220f, -0.239329f,
-  1.985274f,  -0.256181f, -0.064341f, -0.822417f, -0.465140f, -0.010942f,
-  -0.792024f, -0.114290f, 0.060969f,  0.104106f,  -0.252123f, -0.150400f,
-  -0.133277f, 0.267147f,  0.274413f,  0.223744f,  -0.180223f, -0.345415f,
-  -0.104883f, 0.119210f,  -0.095041f, -0.301635f, 0.013175f,  -2.128121f,
-  -0.147208f, -0.151509f, -0.692013f, 3.418555f,  -0.016541f, 0.171511f,
-  0.107159f,  -1.516672f, 0.127408f,  0.687035f,  -0.906486f, -0.145463f,
-  -0.169382f, -0.143906f, 0.125091f,  -0.960645f, -0.180869f, -0.716908f,
-  2.840951f,  1.904919f,  -0.416268f, -0.425181f, -0.194697f, -0.075932f,
-  -0.950604f, -1.599800f, 0.943671f,  -0.022744f, -0.270492f, 0.080843f,
-  -0.372916f, 0.047838f,  -0.100300f, -0.026600f, 0.011733f,  -0.226051f,
-  0.172790f,  -0.172982f, 0.041258f,  -0.299379f,
-};
-
-static const float av1_ab_partition_nn_bias_16_layer1[LABEL_SIZE] = {
-  -0.053805f, -1.248639f, 0.520965f, -0.904962f, -0.126425f, -0.118798f,
-  0.748430f,  0.203096f,  0.059317f, 0.418219f,  0.841294f,  0.402693f,
-  -0.658522f, 0.723479f,  0.544264f, 1.035225f,
-};
-
-static const NN_CONFIG av1_ab_partition_nnconfig_16 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      64,  // num_hidden_nodes
-  },
-  {
-      av1_ab_partition_nn_weights_16_layer0,
-      av1_ab_partition_nn_weights_16_layer1,
-  },
-  {
-      av1_ab_partition_nn_bias_16_layer0,
-      av1_ab_partition_nn_bias_16_layer1,
-  },
-};
-
-#undef FEATURE_SIZE
-#undef LABEL_SIZE
-
-#define FEATURE_SIZE 18
-#define LABEL_SIZE 4
-
-static const float av1_4_partition_nn_weights_16_layer0[FEATURE_SIZE * 24] = {
-  -2.032866f, 0.056691f,  0.495960f,  0.778785f,  0.548153f,  -0.806942f,
-  0.481155f,  0.282298f,  0.584980f,  0.504688f,  0.209648f,  0.234616f,
-  0.213484f,  0.221969f,  0.205862f,  0.235054f,  0.317863f,  0.257139f,
-  0.529478f,  0.098122f,  -0.657532f, 0.036296f,  0.327728f,  1.323180f,
-  -0.813082f, 0.160216f,  -0.702030f, 0.722733f,  -0.270576f, -0.347416f,
-  -0.264700f, -0.254248f, 0.159820f,  0.087995f,  -0.184163f, 0.117357f,
-  0.074194f,  -0.667369f, 0.498246f,  0.420506f,  0.072409f,  -0.121581f,
-  0.315788f,  0.000525f,  0.414986f,  0.678166f,  -0.011230f, 0.188131f,
-  -0.227749f, 0.009564f,  0.108672f,  0.106923f,  -0.080695f, -0.279382f,
-  -0.061339f, -0.297835f, -0.134707f, 0.145865f,  -0.009655f, -0.000842f,
-  -0.047436f, -0.159149f, -0.320353f, -0.089646f, -0.344765f, 0.313416f,
-  -0.143413f, 0.279668f,  0.000885f,  -0.022380f, -0.140194f, -0.310473f,
-  0.252699f,  0.066204f,  0.477568f,  0.994609f,  -0.276000f, 1.213182f,
-  0.277028f,  -0.411570f, -0.211559f, 0.377815f,  0.121488f,  -0.100559f,
-  -0.317082f, -0.251039f, -0.335181f, -0.154114f, -0.052726f, -0.332558f,
-  -0.143196f, -0.334035f, 0.162305f,  0.142279f,  -0.001210f, -0.135252f,
-  -0.033562f, 0.204307f,  -0.039757f, -0.394174f, 0.126617f,  -0.128648f,
-  -0.410979f, 0.107641f,  -0.117573f, -0.326512f, 0.235166f,  0.084959f,
-  0.290063f,  -0.005838f, 0.459894f,  1.023709f,  -0.196145f, 1.100137f,
-  -0.319815f, -0.308526f, -0.443389f, -0.272769f, -0.035259f, -0.026932f,
-  -0.029743f, 0.125113f,  -0.131024f, -0.321458f, -0.143996f, 0.008714f,
-  -0.101234f, 0.079706f,  -1.128615f, -0.467381f, 0.220563f,  -0.409900f,
-  -0.435353f, 0.759499f,  -0.465799f, -0.394309f, 0.176282f,  -0.086275f,
-  -0.161225f, -0.354814f, 0.562871f,  0.418253f,  0.414361f,  0.445480f,
-  -0.995903f, -0.086632f, -0.230645f, 0.354656f,  -0.317576f, 0.079926f,
-  0.424369f,  0.997232f,  -0.304388f, 1.071667f,  -0.023540f, 0.029677f,
-  0.108564f,  0.183581f,  -0.201395f, -0.054854f, -0.193039f, -0.049899f,
-  -0.271949f, -0.358483f, 0.304930f,  0.023823f,  -0.009319f, -0.214247f,
-  0.100712f,  -0.050162f, 0.327103f,  -0.212999f, -0.030496f, 0.316380f,
-  -0.439589f, -0.249959f, 0.229777f,  -0.353664f, -0.384559f, 0.114236f,
-  0.023119f,  0.007927f,  0.618368f,  0.957759f,  -0.019780f, -1.002389f,
-  0.564277f,  -0.839531f, 1.040445f,  0.054340f,  0.031908f,  -0.032893f,
-  -0.019170f, -0.042011f, 0.568928f,  0.362567f,  -0.559999f, -0.605344f,
-  -0.586146f, -0.290778f, 0.195943f,  -0.109580f, -0.088898f, -0.113054f,
-  0.293282f,  0.429019f,  0.306136f,  0.863025f,  0.021234f,  0.125770f,
-  -0.097108f, -0.072659f, -0.137053f, -0.191631f, 0.106281f,  0.064151f,
-  0.029883f,  0.076287f,  0.757543f,  0.276713f,  -2.529775f, -0.351727f,
-  -1.832316f, 0.544780f,  -0.944529f, 0.509705f,  -0.010236f, -0.016181f,
-  0.021520f,  0.086417f,  0.041312f,  0.296853f,  -0.372378f, 0.354446f,
-  -1.366762f, 0.048875f,  0.464918f,  -0.007450f, 0.750013f,  -0.360261f,
-  0.518532f,  0.753776f,  0.641448f,  0.710746f,  0.250866f,  0.257063f,
-  0.283421f,  0.253585f,  0.170303f,  0.210426f,  0.208842f,  0.158000f,
-  -0.033144f, 0.130748f,  0.907147f,  0.409248f,  -0.854301f, -0.981307f,
-  0.294427f,  -0.507137f, 1.079967f,  0.203203f,  0.383890f,  0.368278f,
-  0.305122f,  0.449288f,  -0.044507f, -0.547263f, -0.298245f, -0.497834f,
-  0.007016f,  -0.101982f, -0.073488f, -0.096111f, -0.479418f, -0.045497f,
-  0.033502f,  -0.018578f, -0.231531f, 0.177949f,  0.099564f,  -0.010233f,
-  -0.333055f, -0.078586f, -0.417867f, 0.171271f,  0.013662f,  -0.143599f,
-  -0.117296f, 0.135382f,  0.048321f,  0.000924f,  -0.055024f, -0.405595f,
-  -0.068260f, -0.271011f, -0.436425f, 0.206751f,  -0.899890f, 0.605510f,
-  0.535649f,  -0.238919f, -0.037619f, -0.213734f, -0.391360f, -0.132344f,
-  0.004660f,  0.176644f,  -1.008475f, -0.038895f, 0.155429f,  -0.095229f,
-  -0.680124f, -0.258063f, -0.261901f, 0.110380f,  -0.337649f, -0.505870f,
-  -1.428536f, 0.610629f,  0.254905f,  0.045098f,  0.044109f,  0.172329f,
-  0.060001f,  -0.234009f, -0.184855f, -0.153028f, -0.140897f, -0.152006f,
-  -0.312134f, 0.081261f,  0.160166f,  0.112690f,  0.266081f,  0.030175f,
-  -0.242746f, 0.000754f,  -0.341811f, -0.149774f, -0.017484f, -0.301342f,
-  -0.121466f, 0.067300f,  0.342176f,  0.474538f,  0.085441f,  -0.263935f,
-  0.479235f,  -0.003713f, -0.784840f, 0.119480f,  0.456632f,  -0.640082f,
-  -0.080575f, -0.744403f, 0.259970f,  0.034667f,  -0.274641f, -0.257594f,
-  -1.121124f, -0.003745f, -0.420693f, 0.300441f,  -0.100976f, -1.049016f,
-  0.201960f,  0.113054f,  0.187010f,  1.237427f,  0.054803f,  -0.028673f,
-  0.003596f,  -0.034724f, 0.117246f,  0.190977f,  0.278915f,  0.224307f,
-  0.017852f,  -0.336233f, -0.372311f, -0.182284f, -0.143510f, 0.331466f,
-  0.045698f,  -0.301095f, 0.184447f,  0.348240f,  -0.017021f, -0.145064f,
-  -0.000221f, -0.382256f, -0.302683f, -0.083927f, -0.008070f, 0.217907f,
-  0.647597f,  -0.050490f, -0.572736f, -0.985748f, -0.289943f, 0.041391f,
-  -0.795464f, -0.186680f, -0.354062f, -0.617400f, -0.282783f, -0.170450f,
-  -0.197197f, -0.146496f, -0.173692f, -0.106277f, -0.071004f, -0.124405f,
-  -0.971412f, 0.038542f,  0.705204f,  0.887113f,  0.150430f,  -0.243676f,
-  0.638410f,  0.320953f,  0.776676f,  0.527584f,  0.070389f,  0.051554f,
-  0.177519f,  0.140451f,  0.128892f,  0.087771f,  0.197660f,  0.194764f,
-};
-
-static const float av1_4_partition_nn_bias_16_layer0[24] = {
-  0.614063f,  -0.384872f, 0.084884f,  -0.023980f, -0.378765f, -0.082312f,
-  -0.458271f, 0.189578f,  -0.046169f, -0.073308f, -0.372322f, 0.162793f,
-  0.148803f,  0.829214f,  -0.221162f, -0.111157f, -0.017484f, -0.280596f,
-  -0.031905f, -0.143459f, 0.078823f,  -0.021940f, 0.026834f,  0.257472f,
-};
-
-static const float av1_4_partition_nn_weights_16_layer1[24 * LABEL_SIZE] = {
-  -0.985391f, 0.587616f,  0.740683f,  0.192066f,  0.447080f,  -0.016585f,
-  0.680449f,  0.028983f,  0.643111f,  0.234338f,  0.107148f,  0.328456f,
-  -0.216394f, 1.106838f,  -0.179062f, -0.129108f, -0.121655f, -0.151340f,
-  -0.306017f, -0.350989f, 0.859284f,  -0.372831f, -0.954419f, 0.250495f,
-  1.046732f,  0.287923f,  -0.421088f, 0.326613f,  -0.314396f, -0.084757f,
-  -0.474228f, 0.687999f,  0.052334f,  0.441708f,  -0.630698f, -0.350348f,
-  -0.602067f, -0.434161f, -0.489824f, -0.313193f, 0.315568f,  0.603119f,
-  0.120245f,  0.182920f,  -1.117797f, -0.239594f, -0.296296f, -0.718093f,
-  0.489497f,  -0.527019f, 0.102453f,  0.426731f,  0.034606f,  0.311461f,
-  -0.012723f, -0.229877f, -0.284290f, 0.383227f,  0.065696f,  -0.222400f,
-  1.279248f,  -0.862190f, 0.629766f,  -0.250011f, -0.325060f, -0.360115f,
-  -0.159540f, -0.291856f, -0.038348f, 0.224639f,  0.600934f,  0.030205f,
-  1.337615f,  -0.286409f, -0.473710f, -0.418995f, -1.035249f, 0.004359f,
-  -0.481860f, 0.563625f,  -0.154709f, -0.101198f, -0.758796f, -0.507616f,
-  -0.095253f, -0.711135f, 0.207759f,  0.076313f,  -0.056087f, -0.162719f,
-  -0.232918f, -0.128402f, -0.444620f, -0.447344f, 1.126012f,  -1.504446f,
-};
-
-static const float av1_4_partition_nn_bias_16_layer1[LABEL_SIZE] = {
-  -0.462133f,
-  0.465060f,
-  0.062211f,
-  0.401786f,
-};
-
-static const NN_CONFIG av1_4_partition_nnconfig_16 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      24,  // num_hidden_nodes
-  },
-  {
-      av1_4_partition_nn_weights_16_layer0,
-      av1_4_partition_nn_weights_16_layer1,
-  },
-  {
-      av1_4_partition_nn_bias_16_layer0,
-      av1_4_partition_nn_bias_16_layer1,
-  },
-};
-
-static const float av1_4_partition_nn_weights_32_layer0[FEATURE_SIZE * 32] = {
-  -0.219494f, -0.428273f, 0.471006f,  0.448210f,  -0.152935f, 0.440435f,
-  0.922857f,  -0.074436f, 1.002195f,  0.414176f,  -0.327202f, -0.380066f,
-  -0.212346f, 0.061868f,  -0.056620f, 0.594134f,  0.617995f,  0.308358f,
-  0.232484f,  0.129849f,  1.483593f,  -0.071460f, 1.984515f,  1.116422f,
-  -1.141762f, -0.306220f, 0.089075f,  -0.271845f, 0.187524f,  0.050396f,
-  -0.061025f, 0.030809f,  0.172799f,  -0.458151f, -0.318357f, 0.122052f,
-  -0.414329f, 0.089366f,  0.118898f,  -0.376213f, -0.206151f, -0.519946f,
-  -0.463252f, -0.206694f, -0.254383f, -0.379487f, 0.093059f,  -0.245280f,
-  -0.205044f, -0.280060f, -0.171229f, -0.045389f, -0.179481f, -0.306245f,
-  -0.500856f, 0.003388f,  -0.527397f, -0.449330f, -0.174272f, 0.123769f,
-  0.023005f,  0.157273f,  0.073400f,  0.019099f,  -0.113848f, -0.098601f,
-  -0.290946f, -0.046770f, -0.314592f, -0.179914f, -0.391411f, -0.235631f,
-  -1.282604f, 0.048505f,  -0.746382f, 0.093740f,  -0.706583f, -0.085729f,
-  0.947382f,  -0.002961f, 1.175362f,  1.007309f,  0.141638f,  -0.037608f,
-  -0.118807f, -0.021474f, -0.146763f, 0.069363f,  -0.074372f, -0.215713f,
-  -0.004134f, -0.114110f, -0.330438f, -0.031136f, 0.111821f,  -0.534598f,
-  -0.357759f, -0.455950f, 0.139469f,  0.036582f,  -0.384743f, -0.168828f,
-  -0.239250f, 0.003520f,  -0.049003f, 0.075702f,  -0.025809f, -0.225972f,
-  -0.228905f, -0.412489f, 0.060570f,  -0.328819f, -0.206446f, -0.080231f,
-  -0.372008f, -0.218118f, -0.011954f, 0.024155f,  0.156014f,  0.020679f,
-  0.194398f,  -0.283491f, -0.024463f, -0.275099f, 0.028031f,  0.026340f,
-  -0.254668f, 0.103637f,  2.178693f,  0.552284f,  0.109366f,  -0.474806f,
-  -0.379286f, -0.026315f, 2.487924f,  -0.089466f, 0.206428f,  0.114578f,
-  0.152248f,  0.184050f,  -0.631948f, -0.014793f, -0.283782f, -0.830353f,
-  0.009343f,  -0.021029f, -0.060534f, -0.025164f, 1.841311f,  1.842748f,
-  -1.979708f, 0.450985f,  -1.606357f, -0.785454f, -0.212679f, -0.344342f,
-  0.198991f,  -0.258070f, 0.055974f,  0.224069f,  0.453051f,  0.408053f,
-  0.027873f,  -0.180538f, 0.056609f,  0.207654f,  0.104086f,  -0.194426f,
-  -0.359789f, -0.381143f, -0.331212f, -0.203973f, -0.324313f, -0.160825f,
-  -0.160439f, -0.044856f, -0.346647f, 0.044859f,  0.231398f,  -0.023643f,
-  -0.140316f, -0.260177f, 0.206965f,  -0.425386f, -0.420268f, -0.409748f,
-  0.006971f,  0.066186f,  -0.034950f, -0.345518f, 0.018633f,  -0.122489f,
-  -0.038506f, -0.330942f, 0.161236f,  -0.314119f, -0.050202f, -0.179597f,
-  0.731897f,  -0.184481f, 0.153598f,  -0.539501f, -0.301493f, -0.184967f,
-  -0.883754f, -0.586959f, -0.136292f, -1.772065f, -0.196276f, -0.053272f,
-  -0.101083f, -0.064142f, 0.161190f,  0.430826f,  0.355647f,  0.138266f,
-  0.051114f,  -0.028893f, -0.477673f, -0.238663f, -0.354117f, -0.056747f,
-  -0.334273f, -0.497688f, -0.486004f, -0.092033f, -0.241304f, -0.373250f,
-  0.120193f,  0.011360f,  -0.010475f, -0.092739f, -0.159650f, -0.033129f,
-  -0.259893f, -0.073217f, 0.200128f,  0.103407f,  -0.229233f, 0.128831f,
-  -0.063450f, -0.241732f, -0.408428f, -0.342239f, -0.264326f, -0.105403f,
-  -0.442879f, -0.310456f, -0.112881f, 0.263696f,  -0.205014f, -0.497936f,
-  -0.261734f, -0.382312f, -0.426807f, -0.021995f, -0.152794f, -0.301494f,
-  0.117232f,  -0.577809f, 0.154596f,  -0.409522f, -0.413113f, -0.359199f,
-  0.307294f,  -0.008746f, -0.310522f, 0.347620f,  -0.384845f, -0.451398f,
-  -0.226199f, 0.054154f,  -0.167608f, 0.046836f,  -0.013285f, -0.408119f,
-  -0.177973f, -0.248293f, -0.465830f, 0.035827f,  -0.222208f, -0.221717f,
-  0.066392f,  -0.349769f, -0.428029f, -0.516692f, 0.022398f,  -0.251682f,
-  0.134746f,  0.011167f,  -2.078787f, 0.173592f,  -1.948348f, 0.330060f,
-  1.993785f,  -0.052859f, -0.004795f, -3.703177f, 0.013450f,  -0.011687f,
-  0.073079f,  0.034803f,  0.025515f,  0.005994f,  0.101731f,  0.074303f,
-  -0.109962f, -0.270825f, -0.068273f, -0.163268f, -0.252826f, 0.137190f,
-  0.007667f,  -0.358453f, 0.027412f,  0.033492f,  0.021197f,  -0.049991f,
-  0.104468f,  -0.012157f, -0.056252f, -0.380756f, -0.338483f, 0.233235f,
-  -0.048631f, -0.441209f, -0.158482f, -0.148108f, -0.263453f, 0.138847f,
-  -0.304073f, -0.336312f, -0.017941f, -0.135563f, 0.075137f,  -0.246475f,
-  -0.229144f, -0.087744f, -0.346909f, 0.172611f,  0.004377f,  -0.009386f,
-  -0.023104f, 0.008000f,  -0.029390f, -0.317842f, 0.549674f,  -0.195337f,
-  -0.863979f, 0.160889f,  -0.269014f, -0.442104f, -1.799191f, 1.396533f,
-  -0.112837f, 0.881303f,  0.000764f,  -0.035415f, -0.141877f, 0.184831f,
-  -0.363566f, -0.178569f, 0.254134f,  -0.326893f, 0.127325f,  0.310620f,
-  -0.384621f, 0.146058f,  -0.287682f, -0.373447f, 0.026930f,  0.251650f,
-  0.053817f,  0.227509f,  0.121396f,  0.396514f,  -0.278381f, -0.038969f,
-  -1.538756f, -0.002856f, -0.892900f, 0.363426f,  -1.257922f, 0.743795f,
-  0.941177f,  0.219345f,  0.684189f,  1.396858f,  0.026299f,  -0.093433f,
-  -0.066182f, 0.057868f,  -0.089278f, -0.159680f, -0.262035f, -0.236656f,
-  0.005349f,  -0.031314f, 0.027917f,  -0.182113f, -0.212086f, -0.160774f,
-  0.051468f,  0.036787f,  0.183881f,  -0.288205f, -0.349691f, 0.162511f,
-  0.117878f,  -0.294534f, -0.365037f, -0.246313f, 0.073977f,  -0.072378f,
-  -0.173579f, -0.584560f, 0.547194f,  0.259853f,  -0.405287f, -0.421146f,
-  0.165788f,  -0.146964f, 0.257415f,  0.772394f,  -0.475302f, -0.310906f,
-  0.058723f,  0.276833f,  0.586842f,  0.248998f,  -0.061135f, 0.255779f,
-  0.152158f,  -0.024781f, 2.821834f,  1.365141f,  0.914744f,  0.165752f,
-  -1.048304f, -0.333891f, 1.804087f,  -0.437028f, -0.120211f, -0.020443f,
-  0.040077f,  0.258600f,  -0.598893f, -0.494579f, -0.281054f, -0.517041f,
-  0.005258f,  0.053986f,  0.322755f,  0.429495f,  -1.992364f, -0.717192f,
-  -1.774802f, 2.047362f,  -0.016194f, 0.312606f,  0.019331f,  0.060950f,
-  0.116428f,  0.168458f,  -0.307001f, -0.420734f, 0.475843f,  0.425346f,
-  -0.107119f, 0.049892f,  -1.168619f, 0.010878f,  0.354872f,  0.902717f,
-  -0.391407f, 0.332772f,  -1.335037f, -0.447100f, 0.481719f,  -0.101069f,
-  -1.806565f, 0.925280f,  0.346999f,  0.093809f,  0.006275f,  0.270814f,
-  -0.691123f, 0.230748f,  0.137033f,  0.068228f,  1.555975f,  -0.271637f,
-  -0.370403f, 0.236131f,  0.367464f,  -0.136562f, 0.428838f,  0.181750f,
-  0.338762f,  0.292449f,  -0.748204f, -0.922731f, -0.959445f, -0.806418f,
-  -0.140501f, 0.070525f,  1.248748f,  0.637990f,  -1.307246f, -0.514055f,
-  0.393858f,  -1.858727f, 0.713591f,  -0.141044f, 0.080723f,  0.120220f,
-  -0.031175f, 0.224488f,  0.753818f,  -0.833351f, -1.099132f, 0.651100f,
-  -0.135061f, -0.043820f, 0.026983f,  -0.059259f, 0.001345f,  -0.281775f,
-  0.006958f,  0.046103f,  -0.246539f, 0.057630f,  -0.360778f, -0.160681f,
-  -0.414870f, -0.301979f, 0.000683f,  0.132957f,  -0.477609f, 0.106110f,
-  -0.637769f, -0.078374f, -0.229494f, 0.583108f,  -0.822973f, -0.107540f,
-  1.063426f,  -0.268346f, 1.105787f,  2.587550f,  -0.020314f, -0.002161f,
-  -0.063836f, -0.099990f, -0.103975f, -0.114078f, -0.094199f, -0.065181f,
-  -0.019870f, -0.018920f, -0.219732f, 0.035608f,  -1.789450f, 0.483032f,
-  -0.464729f, 1.563277f,  -1.054195f, 0.359991f,  0.065204f,  0.135623f,
-  0.158380f,  -0.103815f, -1.398726f, -1.436666f, -0.356311f, 0.507752f,
-};
-
-static const float av1_4_partition_nn_bias_32_layer0[32] = {
-  0.421645f,  -0.620548f, -0.187819f, -0.189414f, -0.204975f, -0.189600f,
-  -0.174917f, -0.651928f, -0.799655f, -0.086105f, -0.163449f, -0.089212f,
-  -0.214495f, -0.108500f, -0.065777f, -0.127704f, 1.544948f,  -0.032831f,
-  -0.165621f, 0.145844f,  -0.032104f, -0.453246f, -0.113444f, 0.321589f,
-  -0.862375f, -0.108826f, -0.486259f, 0.685325f,  0.072569f,  -0.187961f,
-  0.109579f,  -0.082685f,
-};
-
-static const float av1_4_partition_nn_weights_32_layer1[32 * LABEL_SIZE] = {
-  0.255012f,  0.658860f,  0.216907f,  0.165947f,  0.241182f,  0.340854f,
-  0.409445f,  0.165220f,  0.553373f,  -0.242385f, -0.209571f, 0.255515f,
-  0.222500f,  0.037032f,  0.238590f,  0.061624f,  -2.038693f, 0.264167f,
-  -0.230144f, 0.129952f,  -0.027979f, 0.847761f,  0.438922f,  0.462323f,
-  0.555345f,  0.030689f,  0.336357f,  -0.357326f, -0.113137f, 0.272631f,
-  0.421022f,  0.367776f,  -0.197094f, 0.157117f,  -0.015008f, -0.056123f,
-  -0.283913f, 0.186417f,  0.178561f,  -0.763041f, 0.602038f,  0.341092f,
-  0.320453f,  -0.312776f, -0.371240f, -0.356279f, 0.220117f,  -0.131871f,
-  1.517429f,  0.162223f,  -0.255069f, 0.451861f,  0.045071f,  -0.223257f,
-  0.003257f,  0.015734f,  -0.630447f, -0.672588f, 0.670164f,  0.571031f,
-  -0.657948f, 0.034506f,  -0.249076f, 0.790293f,  0.066491f,  -0.131245f,
-  0.355173f,  0.564622f,  0.374048f,  0.033974f,  0.253970f,  0.495498f,
-  -0.556321f, -0.104651f, 0.276947f,  0.057148f,  -0.039126f, -0.170050f,
-  -0.141542f, 0.158541f,  0.582763f,  -0.100992f, 0.096705f,  -0.209029f,
-  0.008449f,  0.255865f,  0.103565f,  0.317719f,  0.479499f,  0.599126f,
-  -0.065613f, -0.268614f, 0.508736f,  0.180813f,  -0.815868f, 0.051238f,
-  0.001223f,  -0.305423f, -0.270079f, 0.036180f,  0.304342f,  0.202634f,
-  0.218348f,  -0.304304f, -0.438297f, 0.241123f,  0.200230f,  0.151804f,
-  0.051944f,  0.160422f,  -0.262981f, -0.417412f, 1.845729f,  -0.086183f,
-  0.403517f,  0.059667f,  0.564543f,  -0.081752f, 0.114907f,  -0.284489f,
-  -0.673943f, 0.056965f,  0.362221f,  0.403224f,  -0.000233f, -0.209552f,
-  -0.800926f, -0.134132f,
-};
-
-static const float av1_4_partition_nn_bias_32_layer1[LABEL_SIZE] = {
-  -0.019518f,
-  0.198546f,
-  0.339015f,
-  -0.261961f,
-};
-
-static const NN_CONFIG av1_4_partition_nnconfig_32 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      32,  // num_hidden_nodes
-  },
-  {
-      av1_4_partition_nn_weights_32_layer0,
-      av1_4_partition_nn_weights_32_layer1,
-  },
-  {
-      av1_4_partition_nn_bias_32_layer0,
-      av1_4_partition_nn_bias_32_layer1,
-  },
-};
-
-static const float av1_4_partition_nn_weights_64_layer0[FEATURE_SIZE * 24] = {
-  -0.152649f, 0.074509f,  1.000136f,  0.601661f,  -1.416694f, -1.932396f,
-  -1.163850f, 0.640931f,  -0.888625f, -0.345711f, 0.161799f,  0.103165f,
-  0.147513f,  0.089956f,  0.204329f,  0.196922f,  0.014927f,  0.283714f,
-  -0.110422f, 0.062005f,  -0.531870f, -0.075287f, -0.448349f, -0.218881f,
-  -0.005592f, -0.130490f, -0.015779f, 0.093521f,  -0.158487f, 0.072241f,
-  0.066879f,  -0.418566f, -0.206281f, 0.025634f,  0.048334f,  -0.534750f,
-  0.302081f,  0.028707f,  -1.543248f, 0.103799f,  -1.214052f, 0.395870f,
-  0.394754f,  -0.272170f, -0.702953f, -4.057464f, -0.033497f, -0.042142f,
-  0.014742f,  0.065263f,  0.000879f,  -0.019768f, 0.101275f,  0.163059f,
-  -0.371392f, -0.283484f, 0.241915f,  0.012684f,  -0.210101f, -0.166534f,
-  -0.024894f, 0.274696f,  0.098993f,  0.104086f,  0.055044f,  -0.289378f,
-  0.146571f,  -0.147441f, 0.004056f,  0.112244f,  -0.416162f, -0.033176f,
-  -0.214836f, -0.213787f, 0.023197f,  -0.339043f, 0.301109f,  -0.408551f,
-  0.284922f,  -0.344418f, -0.039255f, 0.158748f,  -0.344169f, 0.078286f,
-  -0.043957f, -0.302162f, -0.310826f, 0.063425f,  0.198166f,  -0.285324f,
-  -0.108252f, 0.038992f,  -1.053110f, -1.663290f, -0.417185f, 1.504443f,
-  0.643206f,  -0.850240f, 0.889641f,  -0.733214f, 0.147302f,  0.060291f,
-  -0.052954f, 0.167453f,  0.111870f,  0.085471f,  0.035107f,  0.064361f,
-  0.176053f,  0.184373f,  0.676576f,  0.066164f,  1.455569f,  0.925111f,
-  -0.640845f, 0.803795f,  -0.653782f, -0.201038f, 0.060033f,  0.016964f,
-  -0.047590f, 0.045908f,  0.354162f,  0.014812f,  0.156978f,  0.058792f,
-  -0.238119f, 0.002450f,  -0.094388f, -0.155229f, 0.194858f,  -0.355429f,
-  -0.187098f, -0.119264f, -0.088694f, -0.102845f, 0.184905f,  -0.425339f,
-  -0.157808f, -0.104599f, -0.393248f, -0.379842f, 0.027741f,  -0.185816f,
-  -0.317294f, 0.002453f,  -0.498241f, -0.204302f, -0.079093f, 0.020646f,
-  -0.412850f, -0.426039f, -0.177050f, -0.419304f, -0.064478f, -0.191802f,
-  -0.146812f, 0.171111f,  0.090261f,  -0.367033f, -0.299051f, -0.322132f,
-  0.428192f,  -0.252613f, 0.488498f,  -0.559682f, 0.486720f,  -0.511084f,
-  0.992506f,  0.346765f,  -0.118697f, -0.065127f, -0.376612f, -0.345137f,
-  -0.426517f, -0.516836f, 0.307083f,  0.609362f,  0.369555f,  0.093775f,
-  -0.375664f, -0.221595f, -0.025465f, 0.134374f,  -0.387031f, 0.096236f,
-  0.337465f,  -0.124029f, -0.157340f, -0.368790f, -0.104490f, -0.279507f,
-  -0.247705f, 0.146559f,  -0.236206f, -0.036073f, 0.064206f,  -0.330919f,
-  0.516591f,  -0.013492f, 1.269568f,  1.182530f,  -0.455390f, -1.328091f,
-  -0.200950f, -0.380513f, -0.195532f, -0.341479f, 0.016064f,  0.021176f,
-  0.169119f,  0.103707f,  -0.174504f, -0.462719f, -0.079445f, -0.247128f,
-  0.459111f,  0.036129f,  0.769570f,  -0.080405f, 1.667107f,  0.355567f,
-  -2.433896f, 0.627572f,  -0.600090f, -0.651872f, -0.059769f, -0.041945f,
-  -0.009933f, 0.014864f,  -0.049378f, -0.041561f, 0.075180f,  0.138307f,
-  0.122366f,  -0.160756f, 0.215327f,  0.013572f,  0.198194f,  -0.762650f,
-  0.054466f,  1.110332f,  1.692853f,  0.658654f,  -0.409549f, 0.506085f,
-  0.330962f,  -0.223008f, 0.007448f,  -0.289062f, -0.476231f, -0.228359f,
-  0.013977f,  -0.000609f, -0.673604f, 0.275996f,  0.405291f,  1.693561f,
-  -1.079768f, 1.122516f,  -0.203227f, 0.099265f,  -0.165207f, -0.323899f,
-  -0.269973f, -0.080122f, 0.127700f,  0.190201f,  0.219527f,  0.306194f,
-  0.026049f,  -0.003779f, 1.107357f,  1.720315f,  1.017908f,  0.078664f,
-  -1.599813f, -0.482636f, -0.117450f, 0.122249f,  0.030220f,  0.039794f,
-  0.176350f,  0.129715f,  -0.305755f, -0.274044f, -0.299640f, -0.187335f,
-  -0.073616f, -0.564507f, -0.127758f, 0.044855f,  -0.191090f, 0.039095f,
-  0.115378f,  0.969352f,  -0.088360f, 0.301443f,  0.065726f,  -0.019740f,
-  -0.102350f, -0.084913f, -0.194615f, 0.118582f,  0.920789f,  -0.171615f,
-  -1.436553f, -0.026419f, -0.730864f, 0.615697f,  -0.795079f, 0.119701f,
-  0.601782f,  0.792902f,  0.184920f,  1.635090f,  -0.085860f, -0.033187f,
-  -0.166883f, 0.008487f,  -0.128300f, -0.089923f, -0.108781f, -0.133719f,
-  -0.011988f, -0.239816f, -0.092563f, -0.238471f, -0.339722f, 0.177432f,
-  -0.063101f, -0.121002f, 0.058072f,  -0.031166f, 0.086413f,  -0.016203f,
-  -0.305075f, -0.005420f, -0.168796f, 0.148745f,  -0.116737f, -0.050222f,
-  -0.287952f, -0.290982f, -0.090449f, 0.076098f,  -0.345632f, -0.061309f,
-  0.142218f,  0.035692f,  0.304517f,  -0.228031f, 0.119608f,  -0.120350f,
-  0.163404f,  -0.105605f, -0.305462f, -0.176657f, 0.210070f,  -0.227600f,
-  -0.081965f, -0.464027f, -0.053782f, -0.018367f, 0.119159f,  0.017162f,
-  -0.069792f, 0.305768f,  -0.421095f, 0.187740f,  -0.032059f, 0.575115f,
-  -0.064283f, -0.091828f, 0.772648f,  -0.393189f, -0.297098f, 0.141420f,
-  0.826389f,  -0.071586f, -0.893968f, -0.346793f, -1.151655f, 0.039393f,
-  1.546000f,  -0.094029f, -0.005786f, -0.195764f, -0.169724f, -0.133167f,
-  -0.129312f, -0.418860f, -0.026553f, -0.053667f, -0.091976f, -0.106275f,
-  -0.492625f, 0.025350f,  -0.332075f, -0.475638f, -0.076667f, -0.065779f,
-  0.108957f,  0.246298f,  -0.289007f, -0.442552f, -0.206692f, -0.257453f,
-  0.073806f,  -0.458606f, -0.410390f, -0.312674f, -0.144813f, 0.170128f,
-  0.018810f,  -0.098241f, 1.027369f,  0.479328f,  1.129707f,  0.484813f,
-  -0.085207f, 0.621873f,  -0.520981f, 0.236175f,  0.273487f,  0.061426f,
-  0.306085f,  0.161487f,  0.220991f,  0.223783f,  -0.091826f, 0.391031f,
-};
-
-static const float av1_4_partition_nn_bias_64_layer0[24] = {
-  0.580225f,  -0.191304f, 1.091767f,  -0.134522f, -0.089361f, 0.398750f,
-  -0.882708f, -0.213102f, -0.119981f, 0.378296f,  -0.075719f, 0.426598f,
-  -2.015505f, 0.202534f,  -1.044792f, -0.841519f, 0.266421f,  -0.047115f,
-  -0.131147f, -0.075066f, -0.009441f, 0.853007f,  -0.175606f, -0.868306f,
-};
-
-static const float av1_4_partition_nn_weights_64_layer1[24 * LABEL_SIZE] = {
-  -0.851937f, -0.211148f, -2.289513f, -0.275071f, 0.251340f,  -0.340847f,
-  0.498032f,  0.308652f,  -0.051574f, 0.323146f,  -0.097547f, -0.040269f,
-  1.909655f,  0.098348f,  0.588136f,  0.568112f,  0.313297f,  0.920848f,
-  -0.014486f, 0.386014f,  0.029199f,  -0.537330f, -0.021502f, 0.349073f,
-  -0.524715f, -0.351848f, 1.565454f,  -0.297148f, 0.020177f,  0.648369f,
-  0.027321f,  -0.096052f, -0.363163f, -0.132642f, 0.024292f,  -0.734176f,
-  -0.782700f, 0.408299f,  0.476945f,  -0.489512f, -0.728318f, -0.632042f,
-  0.405417f,  0.184086f,  -0.400730f, 0.359032f,  0.019710f,  -0.217409f,
-  0.519159f,  -0.136316f, 0.993592f,  -0.147128f, 0.097495f,  0.426189f,
-  -0.295233f, 0.278799f,  0.080667f,  -0.025052f, -0.307757f, 0.418716f,
-  -0.853388f, -0.374878f, -0.322725f, 0.696335f,  -0.380649f, -0.160356f,
-  -0.140060f, 0.502455f,  0.656728f,  -0.095023f, -0.184198f, -0.347069f,
-  0.456372f,  -0.029754f, 0.907923f,  0.265710f,  -0.065505f, 0.226763f,
-  -0.277798f, 0.413292f,  -0.593899f, -0.060740f, -0.313358f, -0.249944f,
-  -0.627329f, -0.327151f, -0.853788f, -1.163807f, -0.388944f, -0.228788f,
-  -0.057382f, 0.334741f,  -0.283083f, 0.368280f,  -0.407197f, -0.441849f,
-};
-
-static const float av1_4_partition_nn_bias_64_layer1[LABEL_SIZE] = {
-  -0.478735f,
-  0.292948f,
-  0.293172f,
-  0.040013f,
-};
-
-static const NN_CONFIG av1_4_partition_nnconfig_64 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      24,  // num_hidden_nodes
-  },
-  {
-      av1_4_partition_nn_weights_64_layer0,
-      av1_4_partition_nn_weights_64_layer1,
-  },
-  {
-      av1_4_partition_nn_bias_64_layer0,
-      av1_4_partition_nn_bias_64_layer1,
-  },
-};
-
-#undef FEATURE_SIZE
-#undef LABEL_SIZE
-
-#define FEATURE_SIZE 4
-static const float
-    av1_partition_breakout_nn_weights_128_layer0[FEATURE_SIZE * 32] = {
-      -0.331785f,  0.068675f,  -0.323814f,  0.033714f,  -0.237835f, 0.166316f,
-      -0.498766f,  -0.545634f, -0.266173f,  -0.476957f, -0.120409f, -0.021042f,
-      0.124056f,   -0.278750f, -0.110120f,  -0.372812f, 4.547939f,  0.097618f,
-      -0.002710f,  -0.064169f, -1.841173f,  -0.403833f, 0.005536f,  0.067188f,
-      -0.434935f,  -0.227421f, -0.000011f,  -0.139961f, -0.174056f, -0.652384f,
-      -0.000015f,  -0.262847f, -3.319706f,  -0.947693f, 0.002981f,  0.016717f,
-      -10.408850f, -0.014568f, -0.000018f,  0.019084f,  1.523383f,  0.074525f,
-      -0.002076f,  -0.020734f, 4.881495f,   0.002799f,  0.000342f,  -0.019623f,
-      1.786154f,   0.037462f,  -0.019037f,  0.052833f,  11.408153f, -0.044602f,
-      0.026155f,   -0.518627f, -0.474499f,  -0.427430f, -0.442733f, -0.011116f,
-      -22.379410f, -0.000549f, -0.001418f,  0.008090f,  -0.295090f, -0.230268f,
-      -0.337278f,  -0.001127f, -0.644282f,  -0.598783f, -0.539417f, -0.003303f,
-      9.189824f,   0.038066f,  -0.004097f,  -0.460045f, -0.308858f, -0.242691f,
-      -0.230835f,  -0.273057f, 0.152226f,   0.179239f,  -0.146382f, -0.004655f,
-      -0.242940f,  -0.718862f, -0.001685f,  -0.214736f, 3.263186f,  0.079463f,
-      -0.003854f,  -0.187461f, -0.599144f,  -0.419808f, -0.000597f, -0.136980f,
-      0.184813f,   -0.319525f, -0.007246f,  0.079709f,  -0.883229f, -0.343748f,
-      -0.000077f,  -0.172214f, -0.548759f,  -0.194674f, -0.144786f, 0.043896f,
-      -0.176364f,  -0.248394f, -0.090215f,  -0.294743f, -0.280980f, -0.181436f,
-      -0.115681f,  -0.071915f, -13.035494f, -0.075623f, 0.017052f,  -0.171152f,
-      5.910803f,   0.128344f,  0.010256f,   -1.073301f, 2.387826f,  0.166183f,
-      -0.007193f,  -0.257836f,
-    };
-
-static const float av1_partition_breakout_nn_bias_128_layer0[32] = {
-  0.115591f,  -0.100178f, -0.165523f, -0.122997f, 11.045759f,  1.034761f,
-  -0.323672f, -0.189087f, 2.850950f,  7.010029f,  -21.447067f, 1.877031f,
-  0.437442f,  5.929414f,  -0.117274f, 4.462253f,  -0.135198f,  -0.145927f,
-  8.727211f,  0.000000f,  -3.532987f, -0.405898f, 11.364439f,  -0.141728f,
-  -5.994947f, -0.362574f, 1.857687f,  -0.100400f, -0.130312f,  0.006080f,
-  0.429660f,  -8.439470f,
-};
-
-static const float av1_partition_breakout_nn_weights_128_layer1[32] = {
-  -0.013738f, 0.022052f,  -0.074437f, -0.211377f, -0.080433f, 0.015543f,
-  0.002091f,  0.014252f,  0.134834f,  0.190263f,  0.244175f,  -0.031747f,
-  0.020068f,  -0.068326f, 0.185471f,  0.660268f,  -0.134898f, -0.010376f,
-  -0.276023f, -0.282921f, -0.022769f, 0.007070f,  -0.186235f, 0.024407f,
-  -0.024837f, 0.005764f,  0.016599f,  -0.040077f, 0.020990f,  0.095054f,
-  -0.039662f, 0.131499f,
-};
-
-static const float av1_partition_breakout_nn_bias_128_layer1[1] = {
-  0.86678213f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_128 = {
-  FEATURE_SIZE,  // num_inputs
-  1,             // num_outputs
-  1,             // num_hidden_layers
-  {
-      32,  // num_hidden_nodes
-  },
-  {
-      av1_partition_breakout_nn_weights_128_layer0,
-      av1_partition_breakout_nn_weights_128_layer1,
-  },
-  {
-      av1_partition_breakout_nn_bias_128_layer0,
-      av1_partition_breakout_nn_bias_128_layer1,
-  },
-};
-
-static const float
-    av1_partition_breakout_nn_weights_64_layer0[FEATURE_SIZE * 16] = {
-      0.872892f,  -0.235539f, -0.412159f, -0.142533f, -2.251479f, -0.057073f,
-      -0.001373f, 0.112147f,  5.281734f,  0.060704f,  0.000838f,  -0.961554f,
-      0.244995f,  0.154515f,  -0.292654f, -0.167177f, -3.759112f, -0.486347f,
-      0.003208f,  -0.418226f, 2.618152f,  0.026832f,  0.003988f,  -0.404406f,
-      -0.405434f, 0.102791f,  -0.033406f, -0.029820f, -4.492342f, -0.154291f,
-      0.012947f,  -0.195075f, 0.009311f,  -0.411410f, -0.010986f, -0.554822f,
-      0.160576f,  0.020796f,  -0.457230f, -0.191111f, -7.759542f, -0.065039f,
-      -0.001322f, 0.055691f,  0.291924f,  -0.053076f, -0.148379f, -0.298383f,
-      1.022023f,  -0.033668f, -0.000804f, -0.825778f, -3.902254f, -0.085812f,
-      -0.052520f, -0.035012f, -0.465468f, -0.319231f, -0.497529f, -0.183068f,
-      -2.407131f, -0.062304f, 0.000874f,  0.108786f,
-    };
-
-static const float av1_partition_breakout_nn_bias_64_layer0[16] = {
-  0.081425f,  -14.404084f, 11.511393f, -0.930053f, 1.841889f,  15.020920f,
-  -1.872288f, 5.392535f,   -0.329335f, -0.005358f, 12.600776f, 0.000000f,
-  -0.337413f, 4.492778f,   0.000000f,  17.043072f,
-};
-
-static const float av1_partition_breakout_nn_weights_64_layer1[16] = {
-  -0.465338f, -0.103023f, -0.174808f, -0.005156f, -0.016366f, -0.172494f,
-  0.014185f,  0.067030f,  -0.001939f, -0.175049f, 0.245992f,  -0.181660f,
-  -0.038572f, 0.307899f,  -0.294283f, 0.118323f,
-};
-
-static const float av1_partition_breakout_nn_bias_64_layer1[1] = {
-  -1.33438122f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_64 = {
-  FEATURE_SIZE,  // num_inputs
-  1,             // num_outputs
-  1,             // num_hidden_layers
-  {
-      16,  // num_hidden_nodes
-  },
-  {
-      av1_partition_breakout_nn_weights_64_layer0,
-      av1_partition_breakout_nn_weights_64_layer1,
-  },
-  {
-      av1_partition_breakout_nn_bias_64_layer0,
-      av1_partition_breakout_nn_bias_64_layer1,
-  },
-};
-
-static const float
-    av1_partition_breakout_nn_weights_32_layer0[FEATURE_SIZE * 16] = {
-      -4.825528f, -0.145737f, 0.001907f,  0.145415f,  -1.858153f, -0.080744f,
-      0.000601f,  0.211991f,  0.384265f,  -0.043945f, -0.521332f, -0.170622f,
-      -0.046866f, -0.600506f, -0.001216f, -0.332760f, -0.447677f, -0.605844f,
-      -0.121008f, -0.119936f, -0.215739f, -0.269665f, -0.668587f, 0.071318f,
-      -1.202551f, -0.729727f, -0.370084f, 0.088215f,  -1.926800f, -0.086519f,
-      0.000359f,  0.215120f,  0.718749f,  0.022942f,  0.003840f,  -0.176518f,
-      1.213451f,  0.080786f,  0.001557f,  -1.053430f, 0.202698f,  -0.583919f,
-      -0.535512f, -0.239927f, -0.110151f, -0.128832f, -0.441087f, -0.145575f,
-      -0.178518f, -0.585784f, 0.000029f,  -0.833014f, -0.331358f, -0.520297f,
-      -0.088676f, -0.178487f, -1.430755f, 0.022981f,  -0.106931f, 0.015573f,
-      -0.520814f, -0.045386f, -0.443123f, -0.484209f,
-    };
-
-static const float av1_partition_breakout_nn_bias_32_layer0[16] = {
-  11.747026f, -9.337718f, 0.341648f, -0.155847f, -0.104005f, 4.666283f,
-  6.669584f,  16.625504f, 9.885626f, 15.439183f, -0.346080f, 0.000000f,
-  -0.423808f, 0.000000f,  6.352258f, -0.155787f,
-};
-
-static const float av1_partition_breakout_nn_weights_32_layer1[16] = {
-  0.168561f,  -0.122519f, 0.524667f,  0.032474f,  0.059097f,  0.011900f,
-  0.166445f,  0.127256f,  -0.034838f, -0.212586f, -0.317973f, 0.348419f,
-  -0.004171f, 0.157694f,  0.117845f,  0.272115f,
-};
-
-static const float av1_partition_breakout_nn_bias_32_layer1[1] = {
-  0.09049262f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_32 = {
-  FEATURE_SIZE,  // num_inputs
-  1,             // num_outputs
-  1,             // num_hidden_layers
-  {
-      16,  // num_hidden_nodes
-  },
-  {
-      av1_partition_breakout_nn_weights_32_layer0,
-      av1_partition_breakout_nn_weights_32_layer1,
-  },
-  {
-      av1_partition_breakout_nn_bias_32_layer0,
-      av1_partition_breakout_nn_bias_32_layer1,
-  },
-};
-
-static const float
-    av1_partition_breakout_nn_weights_16_layer0[FEATURE_SIZE * 16] = {
-      0.209371f,  0.028758f,  0.005764f,  -0.384401f, -0.625777f, -0.005647f,
-      -0.316867f, 0.042985f,  0.127344f,  0.025461f,  0.011465f,  -0.071043f,
-      -0.295977f, -0.076093f, -0.209681f, -0.311653f, -0.147538f, 0.009910f,
-      -0.130997f, -0.012326f, 0.024124f,  -0.323578f, -0.005790f, -0.085664f,
-      -1.575066f, -0.119221f, 0.015018f,  0.187204f,  0.238117f,  0.084924f,
-      -0.004444f, -1.271538f, -0.709860f, -0.006226f, -0.903111f, 0.090573f,
-      -0.278642f, -0.011114f, 0.021162f,  0.081290f,  -0.467486f, -0.040771f,
-      -0.224069f, -0.714390f, -0.281905f, -0.001336f, -0.761212f, -0.060385f,
-      -0.814479f, -0.050450f, -0.003666f, 0.085668f,  -0.272589f, 0.057330f,
-      -0.206540f, -0.303418f, 0.075335f,  -0.180468f, -0.064872f, -0.755948f,
-      -0.509287f, -0.048877f, -0.001512f, 0.077086f,
-    };
-
-static const float av1_partition_breakout_nn_bias_16_layer0[16] = {
-  16.421495f, 4.012273f,  -1.828571f, 0.000000f,  -0.263564f, -0.201972f,
-  6.564987f,  14.651000f, -3.227779f, 2.241833f,  -0.137116f, 0.762876f,
-  5.625762f,  0.615822f,  0.040057f,  16.668884f,
-};
-
-static const float av1_partition_breakout_nn_weights_16_layer1[16] = {
-  -0.096440f, 0.184316f,  -0.021148f, 0.424974f, 0.003743f,  0.006310f,
-  0.046266f,  -0.219224f, -0.087004f, 0.024623f, -0.275798f, 0.120164f,
-  0.269773f,  -0.021105f, -0.146698f, 0.188764f,
-};
-
-static const float av1_partition_breakout_nn_bias_16_layer1[1] = {
-  1.60751927f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_16 = {
-  FEATURE_SIZE,  // num_inputs
-  1,             // num_outputs
-  1,             // num_hidden_layers
-  {
-      16,  // num_hidden_nodes
-  },
-  {
-      av1_partition_breakout_nn_weights_16_layer0,
-      av1_partition_breakout_nn_weights_16_layer1,
-  },
-  {
-      av1_partition_breakout_nn_bias_16_layer0,
-      av1_partition_breakout_nn_bias_16_layer1,
-  },
-};
-
-static const float
-    av1_partition_breakout_nn_weights_8_layer0[FEATURE_SIZE * 16] = {
-      -0.255885f, 0.109548f,  -0.111054f, -0.476119f, -1.083031f, -0.342003f,
-      0.048241f,  -0.356013f, -0.085054f, 0.124908f,  0.000084f,  -0.149906f,
-      -0.729829f, 0.133535f,  -0.002125f, 0.207516f,  -0.210163f, -0.567365f,
-      -0.590103f, 0.045308f,  -0.539406f, 0.130550f,  -0.663879f, -0.170549f,
-      0.017587f,  -0.054187f, 0.000550f,  0.038297f,  -0.112891f, -0.012751f,
-      -0.048067f, 0.095564f,  0.079892f,  0.077285f,  -0.749708f, -0.286312f,
-      -0.054334f, 0.132242f,  -0.004152f, -0.209758f, -0.073407f, 0.082306f,
-      -0.001034f, -0.090990f, 0.122823f,  -0.109794f, -0.230066f, -0.391155f,
-      -0.262245f, -0.004744f, -0.232246f, 0.099290f,  -0.637484f, 0.111937f,
-      -0.548556f, -0.598344f, 0.123265f,  -0.281395f, -0.399711f, -0.525671f,
-      -0.596269f, 0.098494f,  -0.005765f, 0.173652f,
-    };
-
-static const float av1_partition_breakout_nn_bias_8_layer0[16] = {
-  0.194141f, -0.111223f, 2.503733f, -7.155602f, -0.695068f, 0.114874f,
-  2.056990f, 5.284306f,  0.639643f, -2.792049f, -2.232339f, -0.232209f,
-  2.336705f, -0.278834f, 0.231905f, 7.954366f,
-};
-
-static const float av1_partition_breakout_nn_weights_8_layer1[16] = {
-  -0.014439f, 0.010171f, 0.048116f,  -0.090659f, -0.081235f, -0.021840f,
-  -0.017360f, 0.031063f, -0.031737f, -0.023439f, -0.037725f, 0.021954f,
-  0.055858f,  0.230970f, -0.056466f, 0.119780f,
-};
-
-static const float av1_partition_breakout_nn_bias_8_layer1[1] = {
-  1.27784479f,
-};
-
-static const NN_CONFIG av1_partition_breakout_nnconfig_8 = {
-  FEATURE_SIZE,  // num_inputs
-  1,             // num_outputs
-  1,             // num_hidden_layers
-  {
-      16,  // num_hidden_nodes
-  },
-  {
-      av1_partition_breakout_nn_weights_8_layer0,
-      av1_partition_breakout_nn_weights_8_layer1,
-  },
-  {
-      av1_partition_breakout_nn_bias_8_layer0,
-      av1_partition_breakout_nn_bias_8_layer1,
-  },
-};
-#undef FEATURE_SIZE
-
-#define FEATURE_SIZE 9  // Input layer size
-#define NUM_NODES 32    // Hidden layer size
-#define LABEL_SIZE 3    // Output layer size
-
-static const float av1_rect_partition_nn_weights_8_layer0[FEATURE_SIZE *
-                                                          NUM_NODES] = {
-  0.22151f,  0.99424f,  0.23415f,  -1.13841f, -0.11277f, 0.09530f,  0.14769f,
-  -1.18895f, -0.96640f, -0.21421f, -0.13974f, 0.03236f,  0.15777f,  -0.03176f,
-  0.02729f,  -0.37344f, -0.01727f, -0.05469f, 0.19402f,  -3.45508f, 0.90106f,
-  -2.91557f, 0.19379f,  0.14356f,  -0.13291f, 0.05734f,  -0.03032f, -0.13060f,
-  0.35744f,  1.31630f,  -1.54493f, -0.20749f, -0.24413f, -0.04524f, -0.12400f,
-  1.08305f,  -0.21596f, 0.76244f,  1.10616f,  -1.71706f, 0.05768f,  0.10966f,
-  0.00949f,  -0.12680f, 0.00699f,  -0.11522f, -0.38566f, 0.34283f,  -0.35266f,
-  -0.40643f, -0.22462f, 0.32300f,  -0.39737f, -0.20587f, -0.16096f, 1.07543f,
-  0.30314f,  -1.35659f, -0.38212f, 0.45857f,  0.76615f,  0.16819f,  -1.24459f,
-  0.39677f,  0.87436f,  -2.33757f, 1.27471f,  0.27488f,  0.01019f,  -0.01221f,
-  -0.07461f, -0.14577f, -0.01231f, -0.64426f, -1.02733f, -1.96242f, 0.95143f,
-  -0.06777f, -1.13868f, 0.01354f,  -0.75590f, -0.78222f, -0.07453f, 0.61788f,
-  0.56899f,  1.17144f,  0.70899f,  0.48568f,  0.11266f,  0.81579f,  -0.03929f,
-  0.01088f,  0.33599f,  -0.22401f, -0.49654f, -0.02598f, 0.04509f,  -0.08217f,
-  -0.30687f, 0.19851f,  -2.96860f, -2.30698f, 0.01848f,  0.11801f,  0.06614f,
-  0.01673f,  -0.11002f, -0.08168f, 0.09204f,  -0.06379f, 0.27972f,  -0.31716f,
-  -0.00566f, -0.13651f, -0.37276f, 0.01511f,  -0.23697f, 0.21696f,  -0.19480f,
-  0.60758f,  -0.43506f, -0.02247f, -1.45073f, 0.84442f,  -0.94018f, 0.32550f,
-  0.03985f,  -0.06581f, 0.21665f,  0.79472f,  -2.41080f, 0.04788f,  -0.09492f,
-  -0.10677f, 0.07250f,  0.14329f,  -0.37319f, 0.53043f,  -0.49108f, 0.25792f,
-  -0.36569f, -0.28669f, -0.18416f, -0.52385f, -1.17081f, -1.32153f, -1.13403f,
-  -0.26196f, 0.93379f,  0.72115f,  0.54464f,  0.27642f,  0.04757f,  2.01629f,
-  1.55787f,  -0.11665f, 1.00722f,  -0.24352f, 0.53308f,  0.57719f,  0.39344f,
-  0.19174f,  0.06339f,  -0.02530f, 0.07724f,  -0.32416f, -0.26992f, -0.35887f,
-  -0.35285f, -0.33379f, -0.37475f, -0.77335f, 1.70027f,  -1.52153f, -0.26503f,
-  0.97552f,  -2.96705f, -0.91220f, -0.11827f, 0.00406f,  -0.14514f, 0.18417f,
-  -0.20874f, 0.27293f,  -0.34072f, -0.34838f, -0.19054f, -0.29806f, -0.27960f,
-  -0.19293f, -0.18275f, -0.05902f, 0.58625f,  -0.05470f, -0.48814f, -0.45382f,
-  -0.05959f, 2.01250f,  -0.30014f, 0.69546f,  -1.24180f, 1.34923f,  0.20337f,
-  0.16850f,  0.07187f,  0.72630f,  -0.15380f, -2.40973f, -2.73561f, -1.71375f,
-  -1.61695f, 0.50052f,  0.09730f,  0.00579f,  0.06133f,  -0.06512f, -0.61439f,
-  -1.16173f, -0.58716f, 1.60438f,  0.23242f,  0.91847f,  0.49041f,  -0.16277f,
-  -0.02574f, -0.64593f, 1.17028f,  0.46852f,  0.14926f,  0.73853f,  -0.78521f,
-  0.05959f,  -0.35590f, 0.02039f,  0.10812f,  -0.28650f, 1.34038f,  -0.72188f,
-  0.62385f,  -0.35271f, -0.39599f, 0.41543f,  0.53124f,  -0.23510f, -0.15480f,
-  -0.05066f, -0.33529f, 0.05238f,  -0.35311f, -0.26983f, -0.39764f, 0.01085f,
-  0.26593f,  -0.18411f, -0.29945f, 0.50090f,  -0.03397f, 0.78562f,  -0.33068f,
-  1.21308f,  -2.23273f, -0.33366f, -0.15164f, -1.13270f, 0.17394f,  0.65567f,
-  0.76496f,  0.44325f,  0.01368f,  -0.33619f, -0.64256f, 0.64478f,  0.84553f,
-  1.74183f,  0.22563f,  -0.14550f, -0.16258f, 0.03010f,  0.49922f,  0.64575f,
-  -0.29187f, -0.10348f, -1.43619f, -0.56540f, -0.14779f, 0.04616f,  0.87411f,
-  -1.08228f,
-};
-
-static const float av1_rect_partition_nn_bias_8_layer0[NUM_NODES] = {
-  0.33919f,  -0.03003f, 0.79073f,  -0.18508f, 0.00668f,  -0.12017f, 0.35362f,
-  -0.51642f, 0.06536f,  0.41668f,  -0.06509f, 0.94606f,  -0.15385f, 0.14936f,
-  1.46274f,  -0.06961f, 2.82537f,  -1.95576f, -0.09457f, 0.02042f,  -0.07480f,
-  -0.55083f, 0.26170f,  4.39883f,  0.33999f,  -0.10502f, 0.70884f,  -0.06992f,
-  -0.22638f, 1.40940f,  -0.09309f, 0.05828f,
-};
-
-static const float av1_rect_partition_nn_weights_8_layer1[NUM_NODES *
-                                                          LABEL_SIZE] = {
-  0.09209f,  0.26236f,  0.62136f,  0.76324f,  -1.14678f, 0.42289f,  -0.08895f,
-  -0.97267f, 2.05958f,  0.00843f,  0.35335f,  1.12096f,  -0.11679f, 0.07350f,
-  -1.23231f, -0.61990f, 1.51379f,  -1.99450f, 0.22441f,  2.41974f,  -0.30488f,
-  -0.37869f, 0.47168f,  -3.70132f, 0.00061f,  0.19432f,  0.11512f,  0.26200f,
-  -0.35285f, 0.37985f,  0.90571f,  0.27344f,  0.74840f,  -0.17965f, -2.51433f,
-  0.59235f,  1.16670f,  -0.53446f, 0.67897f,  0.04505f,  -0.86874f, 0.45361f,
-  -0.35033f, 1.21283f,  0.31426f,  -0.20841f, 0.56757f,  0.45909f,  -1.23683f,
-  0.09835f,  -0.17214f, -0.96323f, 0.01138f,  -0.50233f, 0.30104f,  2.01814f,
-  1.15821f,  -0.11947f, 0.74574f,  -0.30714f, -0.39646f, -1.30086f, -0.88541f,
-  -0.12259f, -0.54977f, 0.30069f,  1.84299f,  -0.95141f, -0.65887f, -0.25888f,
-  -0.63265f, 1.29531f,  -0.56672f, 0.10837f,  -0.21297f, -2.19131f, 0.01156f,
-  0.51912f,  0.46704f,  0.42810f,  -0.59271f, 0.98469f,  -0.17914f, -1.91163f,
-  -0.32807f, 0.48199f,  -0.99525f, 1.67108f,  -0.87631f, -0.60258f, -0.78731f,
-  -0.32877f, 0.44237f,  0.01087f,  0.07489f,  -0.28224f,
-};
-
-static const float av1_rect_partition_nn_bias_8_layer1[LABEL_SIZE] = {
-  1.70665f,
-  -0.77954f,
-  -0.92709f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_8 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      NUM_NODES,
-  },  // num_hidden_nodes
-  { av1_rect_partition_nn_weights_8_layer0,
-    av1_rect_partition_nn_weights_8_layer1 },
-  { av1_rect_partition_nn_bias_8_layer0, av1_rect_partition_nn_bias_8_layer1 }
-};
-
-static const float av1_rect_partition_nn_weights_16_layer0[FEATURE_SIZE *
-                                                           NUM_NODES] = {
-  -0.18480f, -0.05410f, -0.18957f, 0.15451f,  -0.38649f, -0.26162f, -0.22727f,
-  -0.38555f, -0.36738f, 0.74384f,  -1.85999f, 0.98491f,  -0.72119f, 1.77321f,
-  0.39983f,  0.96314f,  0.23695f,  0.30200f,  0.30629f,  -0.47617f, -1.43320f,
-  -1.81730f, 0.36554f,  -0.07142f, -1.27242f, -1.27697f, 0.00110f,  -0.32179f,
-  0.27460f,  0.45428f,  0.15308f,  -0.73906f, -0.28577f, -0.01238f, -0.16958f,
-  -0.85390f, 1.05484f,  -1.62812f, 0.77632f,  -0.27327f, -0.32527f, 0.32726f,
-  1.73255f,  0.53763f,  0.59121f,  -0.39068f, -0.32451f, -0.31869f, 0.17777f,
-  0.07519f,  -0.18066f, -0.11250f, -0.14616f, -0.16882f, -0.04099f, -0.67959f,
-  0.39674f,  -0.08596f, 0.18587f,  -2.04097f, -1.73993f, 1.57212f,  1.42410f,
-  -1.36762f, -0.41485f, -1.12103f, 0.56959f,  0.11500f,  0.48945f,  -0.13585f,
-  1.22125f,  0.67071f,  -1.11812f, -0.20660f, -0.52856f, 0.70663f,  0.74382f,
-  0.61114f,  -0.11454f, 1.14687f,  0.80322f,  -0.45965f, -0.44466f, -0.05830f,
-  0.13206f,  -0.53750f, -0.11324f, -0.37971f, -0.13491f, -0.21268f, 1.93407f,
-  1.34433f,  2.49427f,  2.91955f,  1.71730f,  0.03295f,  0.03587f,  -0.14550f,
-  0.08189f,  -0.38655f, -0.35432f, -0.62706f, -0.01849f, -0.57882f, -0.60438f,
-  -1.01334f, -0.57302f, 0.22592f,  0.05916f,  -0.05305f, -0.89824f, -0.52969f,
-  -0.24542f, 0.27029f,  -0.40924f, -0.82452f, -0.60665f, -5.03025f, 0.83302f,
-  1.83695f,  2.19716f,  2.31001f,  0.03657f,  0.00063f,  -0.04379f, 0.05835f,
-  -0.08623f, 0.20557f,  -0.17791f, 0.07874f,  -0.25456f, -0.19513f, -0.27753f,
-  -0.31982f, 0.00245f,  -0.33183f, 0.26059f,  -0.22165f, 0.37582f,  -0.30411f,
-  -0.22639f, -0.14739f, -0.20201f, -0.37507f, -1.30653f, 0.49570f,  1.03673f,
-  0.66139f,  0.44941f,  -0.44461f, -0.50376f, -0.49664f, 0.18608f,  -0.26175f,
-  0.14844f,  0.78715f,  -0.70344f, -0.87624f, -0.98535f, -0.35346f, 0.37094f,
-  -0.43135f, -0.22571f, 3.46263f,  3.13580f,  -1.33203f, -0.15247f, -0.15866f,
-  -0.11214f, 0.12211f,  0.03964f,  -1.87597f, -4.81597f, -4.80195f, -4.98096f,
-  -5.62336f, -0.05337f, -0.00943f, 0.00792f,  0.02742f,  1.05679f,  2.41455f,
-  0.85382f,  1.42504f,  0.58096f,  0.21443f,  1.02694f,  1.06746f,  1.20242f,
-  0.60767f,  1.98667f,  -0.80879f, -0.63495f, 1.95508f,  0.23952f,  -0.15019f,
-  -0.16097f, 0.30155f,  -3.42407f, -1.34998f, 9.07689f,  -2.22559f, 2.22562f,
-  -0.03348f, -0.05229f, 0.05931f,  0.03042f,  -0.18068f, -0.05732f, -0.33010f,
-  -0.32279f, -0.26607f, -0.02723f, -0.04067f, 0.08700f,  -0.16366f, -0.24935f,
-  -0.69124f, 0.58508f,  0.50654f,  0.04492f,  1.38340f,  -1.51487f, 1.72889f,
-  -1.95618f, -3.65013f, -1.38525f, -3.05516f, -2.40448f, 2.47467f,  0.03784f,
-  0.08052f,  -0.01971f, -0.08918f, -0.84997f, -0.55302f, -1.07861f, -0.62626f,
-  0.61751f,  -0.11012f, -0.24185f, -0.39201f, -1.85390f, -0.31261f, -0.11927f,
-  0.15671f,  -0.23450f, -0.14916f, -0.31715f, -0.19350f, 0.01795f,  -0.11533f,
-  -0.05799f, -0.03142f, 0.20218f,  -0.39499f, -0.33859f, -0.13201f, -0.19527f,
-  -0.28459f, -0.20346f, 0.89457f,  -2.22103f, -2.37455f, -2.00221f, 2.44553f,
-  0.33915f,  0.50047f,  -0.34625f, -0.19667f, -0.56333f, -0.84328f, 1.25767f,
-  -1.70297f, 1.00482f,  -0.00103f, -1.40813f, 0.21311f,  0.39230f,  -0.07302f,
-  -3.49100f, 1.60675f,  -2.90692f, 0.11022f,  0.13507f,  -0.13308f, 0.15201f,
-  -0.05573f,
-};
-
-static const float av1_rect_partition_nn_bias_16_layer0[NUM_NODES] = {
-  -0.16783f, -0.16023f, 0.52215f,  -0.04109f, 2.00122f,  -0.11633f, 0.25535f,
-  1.80638f,  1.69273f,  -0.25998f, -6.83550f, -0.79682f, -1.03466f, 1.42721f,
-  0.00000f,  -0.00000f, -0.11665f, -0.12047f, -1.01497f, 7.27181f,  -0.78548f,
-  -1.39335f, -5.42248f, -0.10388f, 0.07634f,  2.81012f,  -0.57429f, -0.15629f,
-  -0.12044f, 1.65478f,  -0.75153f, 1.18441f,
-};
-
-static const float av1_rect_partition_nn_weights_16_layer1[NUM_NODES *
-                                                           LABEL_SIZE] = {
-  -0.26407f, 0.06322f,  0.87932f,  0.17772f,  0.71686f,  -0.12283f, 0.08454f,
-  0.20098f,  -0.31763f, -0.33178f, -4.59535f, -0.04367f, 0.17099f,  3.80486f,
-  0.16750f,  0.29218f,  0.57234f,  -0.96550f, -0.10599f, -4.91130f, -0.14658f,
-  0.95803f,  -4.13925f, 0.24567f,  0.25708f,  1.60547f,  -1.03251f, -0.31053f,
-  -0.05659f, -0.94121f, -0.68926f, -0.24738f, -0.38019f, 0.98950f,  0.13689f,
-  0.24504f,  0.49623f,  0.19980f,  0.38349f,  0.37481f,  0.54540f,  -0.02198f,
-  3.43385f,  1.02543f,  -0.40921f, -3.07235f, 0.02996f,  0.00323f,  -0.35414f,
-  0.71099f,  1.39334f,  2.43741f,  -1.11007f, -0.22739f, -4.21757f, 0.11905f,
-  0.00353f,  -1.69637f, 0.45944f,  -0.19884f, 0.03624f,  0.25729f,  0.23659f,
-  -2.08405f, 0.08573f,  -0.53393f, -1.28103f, -0.53970f, -0.65465f, 0.31821f,
-  -0.09884f, -0.69026f, -0.37284f, 0.04622f,  1.32973f,  -0.15414f, 0.19138f,
-  -0.67927f, -0.17658f, 0.36008f,  -0.51832f, 0.09887f,  -1.94414f, 2.95227f,
-  1.76937f,  -0.26687f, 8.50976f,  0.26247f,  0.60262f,  -0.27910f, 0.30061f,
-  -0.05117f, 0.16018f,  0.71195f,  0.57871f,  1.57794f,
-};
-
-static const float av1_rect_partition_nn_bias_16_layer1[3] = {
-  2.68750f,
-  -1.31894f,
-  -1.36768f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_16 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      NUM_NODES,
-  },  // num_hidden_nodes
-  { av1_rect_partition_nn_weights_16_layer0,
-    av1_rect_partition_nn_weights_16_layer1 },
-  { av1_rect_partition_nn_bias_16_layer0, av1_rect_partition_nn_bias_16_layer1 }
-};
-
-static const float av1_rect_partition_nn_weights_32_layer0[FEATURE_SIZE *
-                                                           NUM_NODES] = {
-  -0.54654f, -0.43537f, -0.10620f, -0.48051f, -0.43543f, -0.22737f, -0.15429f,
-  -0.09858f, -0.09438f, 0.37306f,  0.23934f,  -1.86375f, -1.18307f, -0.32995f,
-  -0.09745f, 0.05431f,  -0.13799f, 0.14734f,  -0.33219f, 0.18057f,  -0.23792f,
-  -0.28126f, 0.02977f,  -0.07431f, 0.07860f,  0.00067f,  -0.01927f, 1.01841f,
-  -0.57739f, 0.08412f,  -1.33843f, -1.05563f, -0.28693f, -0.39425f, -0.69572f,
-  -0.16703f, 0.02808f,  0.11994f,  -0.26267f, 0.19706f,  -0.29707f, -0.25305f,
-  -0.07050f, -0.02704f, -0.31528f, -0.42301f, 0.22496f,  -0.37001f, -0.23319f,
-  -0.11139f, -0.30513f, 0.04213f,  -0.12550f, 0.02504f,  0.33245f,  0.01102f,
-  -0.35950f, -0.05949f, -0.19590f, -0.27457f, -0.28339f, -0.15676f, -0.21538f,
-  0.65066f,  0.28443f,  -1.24943f, -3.00246f, -1.01897f, 0.09304f,  0.70052f,
-  -0.12877f, 0.21120f,  -0.37476f, 0.23261f,  -0.28401f, 0.09837f,  0.00020f,
-  -0.12106f, -0.32354f, -0.02472f, -0.19772f, 1.01886f,  0.16596f,  -0.06532f,
-  1.72938f,  1.57754f,  0.55963f,  0.33246f,  -0.20023f, 0.30715f,  0.08629f,
-  0.18945f,  -0.45988f, -1.22610f, -0.05152f, -0.48859f, -1.02104f, -0.27315f,
-  -0.57698f, 0.04157f,  -0.92428f, -1.31268f, 1.78210f,  0.10291f,  1.55042f,
-  -1.26793f, 1.39042f,  -1.43729f, 0.25600f,  5.21263f,  5.31955f,  5.19316f,
-  5.43430f,  0.00294f,  -0.00970f, -0.02333f, 0.00250f,  1.17672f,  6.27544f,
-  4.95973f,  3.54009f,  4.51269f,  0.30750f,  0.78780f,  -0.44741f, -0.76442f,
-  0.75050f,  0.58799f,  0.03400f,  -2.09859f, 1.67313f,  0.12503f,  0.28609f,
-  1.15809f,  2.46530f,  -0.04898f, 0.23072f,  -0.12635f, -0.82097f, -0.63827f,
-  2.16779f,  1.77132f,  0.15434f,  -1.06427f, 0.06206f,  -0.87732f, -0.61897f,
-  -0.44593f, -0.77131f, -0.15979f, -0.02282f, -0.74381f, 0.66052f,  -0.22992f,
-  1.74638f,  1.29199f,  -0.55464f, 0.98316f,  0.06665f,  0.50254f,  -0.66292f,
-  0.17113f,  -0.32633f, -1.85803f, -0.92759f, 4.44965f,  1.33057f,  0.02135f,
-  -0.27446f, -0.26018f, -0.12613f, -0.14470f, -0.23355f, -0.09717f, -0.24123f,
-  -0.05535f, -0.19146f, -0.36222f, -0.30458f, -0.40323f, 0.21779f,  0.14248f,
-  -0.48630f, 0.18840f,  0.11040f,  0.17287f,  -0.51880f, 1.12466f,  -0.38888f,
-  -0.16421f, -0.31784f, -0.36112f, -0.25386f, -0.01636f, 0.10029f,  -0.26881f,
-  -0.17051f, -0.30903f, -0.08573f, -0.28774f, -0.01173f, -0.09706f, -0.23089f,
-  -0.12922f, -0.17463f, -0.12433f, -0.23074f, 0.15220f,  1.29826f,  0.23788f,
-  0.04189f,  2.66416f,  0.48815f,  -0.06803f, 0.96742f,  1.27165f,  -0.70348f,
-  -0.09941f, -0.42948f, -0.20243f, -0.02364f, -0.26689f, -0.40629f, -0.68217f,
-  -0.48073f, 2.43657f,  -2.60191f, -1.82837f, 0.50440f,  0.71829f,  0.76491f,
-  0.28293f,  0.20568f,  0.92642f,  -0.02496f, 1.43637f,  -0.24474f, -1.21030f,
-  0.54084f,  1.05130f,  1.29572f,  0.03750f,  -0.36894f, 0.74548f,  -1.33857f,
-  -0.84858f, 1.35230f,  0.80175f,  0.66136f,  1.06473f,  0.18701f,  1.42413f,
-  0.04661f,  -0.07820f, 0.64990f,  -0.43595f, 1.18304f,  -0.11437f, -0.06365f,
-  0.03558f,  0.78260f,  -1.74890f, 1.56217f,  -1.23424f, 4.59193f,  -3.35072f,
-  0.01180f,  -0.18296f, -0.20870f, 0.04510f,  1.52595f,  -1.37402f, -0.33123f,
-  -0.85957f, 0.80598f,  0.03743f,  0.02354f,  0.37707f,  1.62095f,  -0.29627f,
-  -0.31778f, -0.45789f, -0.14906f, 0.25315f,  -0.10817f, -0.32610f, -0.40890f,
-  0.33984f,
-};
-
-static const float av1_rect_partition_nn_bias_32_layer0[NUM_NODES] = {
-  -0.17482f, 0.39042f,  0.00000f,  1.69677f,  0.08792f,  -0.09301f, 0.13809f,
-  4.84061f,  0.00000f,  0.40515f,  0.46246f,  0.20644f,  -5.77478f, -1.54510f,
-  0.05660f,  -0.32013f, 0.23649f,  0.03778f,  -2.53710f, -0.27869f, 0.45623f,
-  -0.04155f, -0.18445f, -0.73405f, -0.50243f, 2.23191f,  1.93272f,  -1.07032f,
-  -0.27602f, -1.98063f, 0.20816f,  -0.01315f,
-};
-
-static const float av1_rect_partition_nn_weights_32_layer1[NUM_NODES *
-                                                           LABEL_SIZE] = {
-  0.02827f,  1.02560f,  -0.07137f, -0.31911f, 0.11365f,  0.13684f,  -0.07816f,
-  -5.23036f, -0.34340f, 0.84526f,  -1.51845f, 0.07017f,  -8.12570f, 6.24061f,
-  0.35739f,  -0.09937f, -0.30978f, 0.22032f,  0.74968f,  -0.34557f, 0.45547f,
-  -0.16512f, 0.07118f,  1.66415f,  0.41320f,  -1.81533f, -1.96004f, 1.04666f,
-  0.84049f,  4.31009f,  0.68850f,  0.26322f,  -0.24634f, -1.25889f, 0.31952f,
-  0.63632f,  0.05801f,  -0.10664f, -0.21992f, 2.44386f,  0.19526f,  -0.09838f,
-  1.53049f,  -0.26630f, 3.54126f,  -3.40574f, 0.72730f,  0.04557f,  0.92652f,
-  0.15522f,  2.35895f,  -0.13347f, 0.56907f,  0.15352f,  0.01823f,  -0.73939f,
-  0.43104f,  1.90321f,  0.31267f,  -0.51972f, 0.50094f,  -3.98372f, -3.41518f,
-  -0.48183f, 0.26661f,  0.64146f,  0.14500f,  -0.01695f, 0.16653f,  -0.37846f,
-  0.08412f,  2.69714f,  -0.20258f, -0.75786f, 0.11201f,  0.61878f,  4.22231f,
-  -3.55330f, -1.14137f, -0.37722f, -0.28000f, -0.72581f, -2.62827f, -0.19448f,
-  -0.59398f, -0.30136f, -0.17725f, -0.69630f, -0.41132f, 0.12208f,  2.11441f,
-  -1.08794f, -1.41694f, 0.02620f,  2.18792f,  0.04271f,
-};
-
-static const float av1_rect_partition_nn_bias_32_layer1[3] = {
-  2.47332f,
-  -1.65756f,
-  -0.81573f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_32 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      NUM_NODES,
-  },  // num_hidden_nodes
-  { av1_rect_partition_nn_weights_32_layer0,
-    av1_rect_partition_nn_weights_32_layer1 },
-  { av1_rect_partition_nn_bias_32_layer0, av1_rect_partition_nn_bias_32_layer1 }
-};
-
-static const float av1_rect_partition_nn_weights_64_layer0[FEATURE_SIZE *
-                                                           NUM_NODES] = {
-  0.08972f,  4.09095f,  -0.31398f, -2.43631f, -0.74767f, 1.42471f,  1.60926f,
-  1.44721f,  1.88259f,  2.35375f,  1.88299f,  2.01109f,  0.98679f,  2.24131f,
-  0.06279f,  -0.08315f, 0.32107f,  0.91334f,  -0.36569f, 5.55049f,  5.44943f,
-  5.20471f,  5.39099f,  -0.01943f, -0.00284f, 0.02203f,  -0.01309f, 1.41917f,
-  6.68460f,  -6.15986f, 6.41341f,  -3.20630f, -0.00567f, -0.00038f, 0.05960f,
-  0.04308f,  0.95366f,  3.48535f,  2.98266f,  4.11784f,  3.44255f,  0.61630f,
-  0.71405f,  0.63945f,  -0.00713f, 0.39193f,  1.91621f,  3.32755f,  0.71674f,
-  -0.11647f, 2.07090f,  2.64191f,  0.07949f,  -0.05023f, 0.99935f,  0.83145f,
-  0.75898f,  -0.98764f, -0.58731f, 1.21734f,  -0.08076f, -3.26780f, 1.66278f,
-  0.04189f,  -0.33177f, -1.58648f, 1.00883f,  -0.56132f, -2.34877f, 0.67056f,
-  -2.32297f, -0.91641f, -1.02909f, 4.19781f,  3.87484f,  4.32778f,  -1.97171f,
-  -0.24734f, 0.00822f,  0.05892f,  0.12697f,  -3.62915f, -2.93127f, 7.94856f,
-  -3.29311f, 3.26001f,  -0.02231f, 0.02741f,  0.05919f,  0.08190f,  -1.49344f,
-  -0.64475f, -0.24627f, 4.03324f,  -1.14799f, -0.18465f, -0.17829f, 0.10394f,
-  0.08580f,  -5.74721f, 4.42467f,  3.63964f,  3.00258f,  -1.22744f, -0.29408f,
-  0.00767f,  0.12305f,  0.05249f,  -0.17166f, -0.20120f, -0.32941f, -0.31901f,
-  0.04628f,  -0.35249f, -0.18272f, 0.03956f,  -0.19329f, -0.33564f, 0.09856f,
-  -0.00173f, -0.31751f, -0.05702f, -0.20558f, -0.31464f, -0.02488f, -0.00729f,
-  -0.35854f, -0.14762f, -0.34897f, -0.12746f, 0.04011f,  -0.24918f, -0.53516f,
-  -0.28440f, -0.36789f, -1.34889f, -9.10044f, -9.19238f, 4.48042f,  6.54429f,
-  -0.00226f, 0.00430f,  0.00321f,  0.00442f,  0.87551f,  -0.16224f, -0.22832f,
-  -0.60640f, -0.28738f, 0.18062f,  0.22008f,  -0.47406f, 0.80302f,  0.12149f,
-  1.49530f,  1.05069f,  -2.02985f, -0.92833f, 0.25616f,  0.12852f,  3.51840f,
-  0.25226f,  -2.63283f, -4.04386f, 8.46300f,  -2.93408f, 0.44069f,  0.08276f,
-  0.34482f,  -0.22615f, 0.28666f,  3.02962f,  -1.20055f, -1.04832f, -0.97632f,
-  -0.99530f, 1.44196f,  1.68550f,  0.49360f,  1.08155f,  -0.26059f, -0.02876f,
-  -0.27492f, -0.06205f, -0.09496f, -0.12314f, -0.30228f, -0.07453f, -0.38857f,
-  1.17443f,  2.41497f,  1.90537f,  2.37716f,  2.91495f,  -0.44455f, -0.51176f,
-  0.48195f,  0.53032f,  0.23696f,  -1.06211f, 1.47459f,  -0.89029f, 0.29521f,
-  0.66291f,  -0.42653f, 1.82308f,  -1.30372f, -0.36192f, -3.40388f, -1.61476f,
-  -2.29745f, -0.66886f, -2.08252f, -0.54552f, -4.06849f, 0.02948f,  0.27297f,
-  -4.81472f, 4.60404f,  -0.11053f, 0.14765f,  0.02826f,  -0.14688f, -0.07066f,
-  -0.01224f, 1.20377f,  7.02725f,  -6.02627f, 6.87255f,  -3.14257f, 0.01074f,
-  0.02397f,  -0.02359f, 0.01901f,  0.14956f,  -1.67671f, 2.26714f,  2.57043f,
-  -0.45888f, -1.60265f, -2.11475f, -2.74029f, -2.74658f, -0.35630f, -2.63013f,
-  -2.14814f, -0.67266f, -1.56850f, 0.57137f,  -1.14428f, -0.34265f, -0.12521f,
-  0.01220f,  -0.74906f, -0.19270f, 0.68110f,  -0.24737f, -0.70568f, -1.64826f,
-  -0.35847f, -0.15984f, -1.17932f, -8.72306f, -8.72834f, 3.93701f,  6.17812f,
-  -0.03191f, -0.00104f, 0.01402f,  -0.00046f, -0.94517f, 1.51266f,  -0.56318f,
-  0.72260f,  -0.09253f, -0.09069f, -2.16695f, -0.23653f, 0.24418f,  2.21148f,
-  -1.47954f, -1.01439f, 0.31536f,  0.77238f,  -0.85083f, -0.15758f, -0.50886f,
-  0.09101f,
-};
-
-static const float av1_rect_partition_nn_bias_64_layer0[NUM_NODES] = {
-  0.91706f,  -1.31328f, -5.16196f, 1.13191f,  -0.98044f, -1.61122f, 1.03039f,
-  -0.98537f, -4.45568f, -4.34802f, -0.92116f, 0.66836f,  -0.10752f, -0.13065f,
-  -0.35567f, -0.35693f, 1.74941f,  1.17379f,  -3.45555f, 5.66321f,  -0.24917f,
-  -1.11940f, -0.73656f, -0.19299f, -0.04181f, 1.11010f,  -2.97859f, -0.16774f,
-  0.59835f,  -0.31269f, -0.30585f, -1.66212f,
-};
-
-static const float av1_rect_partition_nn_weights_64_layer1[NUM_NODES *
-                                                           LABEL_SIZE] = {
-  0.58963f,  4.20320f,  -8.62465f, -6.54014f, 5.41108f,  2.33581f,   -0.10354f,
-  -1.17753f, -3.45909f, -2.24722f, 2.20881f,  3.21971f,  -0.09087f,  -0.21624f,
-  0.16529f,  -8.40985f, -1.60205f, -1.41538f, 4.41826f,  -4.63069f,  -0.27742f,
-  4.08710f,  0.26439f,  -1.46028f, 0.51234f,  6.25212f,  -3.35650f,  -1.21348f,
-  1.37201f,  8.89151f,  0.28859f,  -0.97328f, -0.36196f, -2.71701f,  4.54196f,
-  -0.62476f, -2.43814f, -1.34209f, 0.12850f,  1.73859f,  3.09809f,   -4.42434f,
-  -1.82552f, -3.66420f, -0.31535f, 0.00968f,  -0.02019f, 9.66824f,   0.58835f,
-  1.50425f,  2.84487f,  2.55522f,  0.01409f,  -2.27594f, -0.31800f,  0.91076f,
-  -0.66808f, 0.33120f,  -0.12460f, 0.64457f,  -0.36416f, -10.30843f, 1.51013f,
-  2.06861f,  -0.20989f, -0.87119f, 3.68642f,  7.33662f,  -2.88037f,  -0.52414f,
-  -0.35036f, -0.45947f, -0.07406f, 6.46346f,  -0.16031f, 0.27071f,   0.38845f,
-  -0.21940f, 0.08583f,  -1.39526f, 0.50554f,  0.45279f,  -6.61856f,  1.84069f,
-  -0.19149f, -1.77235f, 0.75136f,  1.11797f,  0.32677f,  -7.10427f,  3.82908f,
-  1.04238f,  -0.91435f, 1.93317f,  -1.84946f, -0.48909f,
-};
-
-static const float av1_rect_partition_nn_bias_64_layer1[3] = {
-  0.32215f,
-  -0.57522f,
-  0.25314f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_64 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      NUM_NODES,
-  },  // num_hidden_nodes
-  { av1_rect_partition_nn_weights_64_layer0,
-    av1_rect_partition_nn_weights_64_layer1 },
-  { av1_rect_partition_nn_bias_64_layer0, av1_rect_partition_nn_bias_64_layer1 }
-};
-
-static const float av1_rect_partition_nn_weights_128_layer0[FEATURE_SIZE *
-                                                            NUM_NODES] = {
-  -0.70901f, -3.03481f, 3.30604f,  -1.28803f,  -0.08610f, -0.33320f, -0.30716f,
-  0.25100f,  0.14323f,  -0.98422f, -0.89084f,  -0.24508f, -1.10785f, -0.82524f,
-  0.11766f,  -0.42777f, 1.08965f,  4.35125f,   -1.19388f, 4.22042f,  4.96306f,
-  6.32406f,  3.29899f,  -0.90768f, 0.05203f,   0.38467f,  1.74257f,  -0.19918f,
-  -0.11335f, 0.00140f,  -0.42303f, -0.04419f,  0.03583f,  -0.05441f, -0.19586f,
-  0.01484f,  -1.19964f, 0.25497f,  3.04502f,   0.05446f,  -0.23253f, 0.00266f,
-  0.07117f,  -2.78986f, -4.62953f, 1.45331f,   0.43923f,  0.92298f,  -0.47736f,
-  1.49165f,  0.45942f,  -1.99787f, 3.33510f,   0.17234f,  0.04024f,  -1.42780f,
-  0.23566f,  -0.90970f, 1.18041f,  -1.45865f,  2.30878f,  -1.28507f, 1.87290f,
-  1.91186f,  4.74826f,  -3.70735f, 4.49808f,   -4.72275f, -0.02696f, -0.02642f,
-  -0.06093f, -0.01121f, -0.70683f, 2.69737f,   -1.88563f, 2.48637f,  1.10922f,
-  0.74624f,  0.40308f,  2.06396f,  1.39289f,   0.00909f,  -2.05271f, -1.53539f,
-  -1.38323f, 0.83303f,  -0.32250f, 0.51172f,   3.91249f,  1.66373f,  1.13184f,
-  -2.22874f, -1.13448f, -0.11185f, 0.19387f,   0.36770f,  -0.58933f, 0.22789f,
-  1.17307f,  0.77461f,  0.20817f,  0.33417f,   0.54037f,  0.32961f,  -0.18456f,
-  -9.78171f, -0.17216f, -3.44703f, -2.42158f,  0.51946f,  4.35949f,  -0.73335f,
-  -1.61515f, -0.29622f, -0.37617f, -0.42316f,  0.74922f,  1.44386f,  3.92704f,
-  -3.76274f, 4.19775f,  -3.86958f, 0.00074f,   -0.02418f, -0.12944f, 0.05857f,
-  -0.85507f, 5.42546f,  5.40338f,  5.54347f,   5.59791f,  -0.01611f, 0.01618f,
-  -0.01654f, -0.00270f, -0.39608f, -0.40410f,  -0.24551f, 0.09124f,  -0.34413f,
-  -0.11504f, 0.12793f,  -0.31523f, 0.09148f,   -0.08567f, -0.05140f, -0.13310f,
-  -0.81200f, 0.06882f,  -0.52537f, -12.74048f, -0.45395f, -4.04775f, -1.84887f,
-  -1.02573f, 0.32788f,  1.06828f,  -1.25503f,  -0.42693f, 2.01413f,  -2.29103f,
-  0.62271f,  1.11764f,  -1.83113f, -1.32325f,  -1.65651f, -2.87826f, 1.46910f,
-  0.60885f,  0.16079f,  0.00171f,  -0.25658f,  -0.25465f, -0.14149f, 0.19497f,
-  -0.07866f, -0.37080f, -0.05778f, -0.08870f,  -0.20491f, 0.84521f,  -0.18214f,
-  -1.38441f, -1.08932f, -1.76627f, 0.73172f,   0.05967f,  1.28057f,  3.42722f,
-  1.69287f,  0.77169f,  0.44528f,  1.85513f,   0.07840f,  1.31252f,  2.89948f,
-  1.49489f,  0.15281f,  0.54708f,  -1.14185f,  -2.51063f, 0.36618f,  -0.55322f,
-  0.96671f,  1.59470f,  1.38252f,  1.99697f,   0.03266f,  -0.23200f, -0.01127f,
-  -0.18918f, -0.37598f, -0.03119f, -0.36039f,  -0.21192f, -0.11565f, -4.22635f,
-  1.41252f,  0.56608f,  -0.08867f, 3.11924f,   -0.54597f, -0.12504f, -0.05289f,
-  -0.28665f, -0.58297f, -1.18362f, -0.76201f,  -1.22011f, -0.58756f, 0.14740f,
-  1.43971f,  0.98381f,  -0.02998f, -0.40678f,  -0.23047f, -0.12979f, 0.04003f,
-  -0.22081f, -0.09294f, -0.15955f, -0.10379f,  -0.10192f, -1.51316f, 2.39482f,
-  -1.69975f, 3.58976f,  -0.91032f, -0.03498f,  0.48982f,  -0.13418f, 0.76256f,
-  1.61003f,  -2.01676f, -1.24430f, -3.25763f,  1.12314f,  2.00740f,  0.04613f,
-  -0.14746f, -0.57374f, 3.44511f,  -0.56767f,  -4.08432f, -2.04894f, 2.35951f,
-  -0.00458f, 0.18512f,  0.09916f,  -0.04084f,  -1.56207f, 1.38034f,  4.17302f,
-  -1.47326f, -2.03530f, -0.00210f, 0.27469f,   -0.17423f, 0.86860f,  2.76195f,
-  2.43269f,  -3.57331f, 2.08715f,  -1.44171f,  -0.17389f, 2.26157f,  -0.07852f,
-  2.02519f,
-};
-
-static const float av1_rect_partition_nn_bias_128_layer0[NUM_NODES] = {
-  2.53427f,  1.66678f,  -0.84914f, -0.15070f, -1.74769f, 0.45218f,  -0.26067f,
-  2.05916f,  0.08978f,  5.30984f,  2.66243f,  -1.62740f, 0.70018f,  1.96403f,
-  -4.97152f, -0.05425f, -3.84474f, -1.28006f, 3.47490f,  -0.08373f, 0.00225f,
-  -1.40692f, -0.27569f, -0.30253f, 0.77377f,  -0.67636f, -0.26379f, 1.82348f,
-  0.66120f,  0.61119f,  -1.42293f, 0.32676f,
-};
-
-static const float av1_rect_partition_nn_weights_128_layer1[NUM_NODES *
-                                                            LABEL_SIZE] = {
-  1.53453f,  -0.23707f, 7.88368f,  0.33340f,  0.97523f,  1.38538f,  -0.16746f,
-  4.42070f,  3.18678f,  -5.03545f, -2.27029f, -3.75719f, -0.26850f, -4.93432f,
-  -8.75673f, 0.27398f,  -5.77882f, -0.91616f, -2.62725f, -0.23961f, 0.31249f,
-  3.32134f,  0.25375f,  -0.00394f, 2.30213f,  -0.14183f, 0.14544f,  -1.42830f,
-  1.31101f,  3.99389f,  -0.00017f, -2.90184f, -2.11444f, 2.16734f,  -3.05133f,
-  0.39206f,  4.61489f,  -2.88181f, -0.47745f, 2.86649f,  -1.20621f, 3.70550f,
-  1.58029f,  -4.58731f, -2.29350f, -0.76930f, 5.19135f,  -0.22521f, -5.08782f,
-  2.17316f,  1.30563f,  0.16777f,  -2.17767f, -2.09904f, 1.37001f,  0.25091f,
-  -1.76743f, 1.57940f,  0.30544f,  -2.39895f, -0.08532f, -1.77122f, 1.84010f,
-  -0.88449f, 0.79299f,  -1.35368f, -4.54110f, 0.02244f,  -5.11580f, 1.60883f,
-  0.29352f,  -6.47042f, -1.81426f, 1.24013f,  0.90980f,  7.93977f,  2.12555f,
-  5.24720f,  4.19508f,  0.21499f,  11.06045f, -0.74752f, 0.89396f,  0.26422f,
-  1.72332f,  -1.25113f, -1.71136f, 0.13676f,  -0.07867f, -0.96929f, 0.19911f,
-  3.58233f,  -0.76470f, -2.24162f, -2.87465f, 3.18736f,
-};
-
-static const float av1_rect_partition_nn_bias_128_layer1[3] = {
-  1.09014f,
-  -0.53317f,
-  -0.55668f,
-};
-
-static const NN_CONFIG av1_rect_partition_nnconfig_128 = {
-  FEATURE_SIZE,  // num_inputs
-  LABEL_SIZE,    // num_outputs
-  1,             // num_hidden_layers
-  {
-      NUM_NODES,
-  },  // num_hidden_nodes
-  { av1_rect_partition_nn_weights_128_layer0,
-    av1_rect_partition_nn_weights_128_layer1 },
-  { av1_rect_partition_nn_bias_128_layer0,
-    av1_rect_partition_nn_bias_128_layer1 }
-};
-#undef FEATURE_SIZE
-#undef NUM_NODES
-#undef LABEL_SIZE
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_
diff --git a/third_party/aom/av1/encoder/pickcdef.c b/third_party/aom/av1/encoder/pickcdef.c
deleted file mode 100644
index 6d154a7d2..000000000
--- a/third_party/aom/av1/encoder/pickcdef.c
+++ /dev/null
@@ -1,526 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <string.h>
-
-#include "config/aom_scale_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "av1/common/cdef.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-#include "av1/encoder/encoder.h"
-
-#define REDUCED_PRI_STRENGTHS 8
-#define REDUCED_TOTAL_STRENGTHS (REDUCED_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
-#define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
-
-static int priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 5, 7, 10, 13 };
-
-/* Search for the best strength to add as an option, knowing we
-   already selected nb_strengths options. */
-static uint64_t search_one(int *lev, int nb_strengths,
-                           uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
-                           int fast) {
-  uint64_t tot_mse[TOTAL_STRENGTHS];
-  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
-  int i, j;
-  uint64_t best_tot_mse = (uint64_t)1 << 63;
-  int best_id = 0;
-  memset(tot_mse, 0, sizeof(tot_mse));
-  for (i = 0; i < sb_count; i++) {
-    int gi;
-    uint64_t best_mse = (uint64_t)1 << 63;
-    /* Find best mse among already selected options. */
-    for (gi = 0; gi < nb_strengths; gi++) {
-      if (mse[i][lev[gi]] < best_mse) {
-        best_mse = mse[i][lev[gi]];
-      }
-    }
-    /* Find best mse when adding each possible new option. */
-    for (j = 0; j < total_strengths; j++) {
-      uint64_t best = best_mse;
-      if (mse[i][j] < best) best = mse[i][j];
-      tot_mse[j] += best;
-    }
-  }
-  for (j = 0; j < total_strengths; j++) {
-    if (tot_mse[j] < best_tot_mse) {
-      best_tot_mse = tot_mse[j];
-      best_id = j;
-    }
-  }
-  lev[nb_strengths] = best_id;
-  return best_tot_mse;
-}
-
-/* Search for the best luma+chroma strength to add as an option, knowing we
-   already selected nb_strengths options. */
-static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
-                                uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
-                                int fast) {
-  uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
-  int i, j;
-  uint64_t best_tot_mse = (uint64_t)1 << 63;
-  int best_id0 = 0;
-  int best_id1 = 0;
-  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
-  memset(tot_mse, 0, sizeof(tot_mse));
-  for (i = 0; i < sb_count; i++) {
-    int gi;
-    uint64_t best_mse = (uint64_t)1 << 63;
-    /* Find best mse among already selected options. */
-    for (gi = 0; gi < nb_strengths; gi++) {
-      uint64_t curr = mse[0][i][lev0[gi]];
-      curr += mse[1][i][lev1[gi]];
-      if (curr < best_mse) {
-        best_mse = curr;
-      }
-    }
-    /* Find best mse when adding each possible new option. */
-    for (j = 0; j < total_strengths; j++) {
-      int k;
-      for (k = 0; k < total_strengths; k++) {
-        uint64_t best = best_mse;
-        uint64_t curr = mse[0][i][j];
-        curr += mse[1][i][k];
-        if (curr < best) best = curr;
-        tot_mse[j][k] += best;
-      }
-    }
-  }
-  for (j = 0; j < total_strengths; j++) {
-    int k;
-    for (k = 0; k < total_strengths; k++) {
-      if (tot_mse[j][k] < best_tot_mse) {
-        best_tot_mse = tot_mse[j][k];
-        best_id0 = j;
-        best_id1 = k;
-      }
-    }
-  }
-  lev0[nb_strengths] = best_id0;
-  lev1[nb_strengths] = best_id1;
-  return best_tot_mse;
-}
-
-/* Search for the set of strengths that minimizes mse. */
-static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
-                                      uint64_t mse[][TOTAL_STRENGTHS],
-                                      int sb_count, int fast) {
-  uint64_t best_tot_mse;
-  int i;
-  best_tot_mse = (uint64_t)1 << 63;
-  /* Greedy search: add one strength options at a time. */
-  for (i = 0; i < nb_strengths; i++) {
-    best_tot_mse = search_one(best_lev, i, mse, sb_count, fast);
-  }
-  /* Trying to refine the greedy search by reconsidering each
-     already-selected option. */
-  if (!fast) {
-    for (i = 0; i < 4 * nb_strengths; i++) {
-      int j;
-      for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
-      best_tot_mse =
-          search_one(best_lev, nb_strengths - 1, mse, sb_count, fast);
-    }
-  }
-  return best_tot_mse;
-}
-
-/* Search for the set of luma+chroma strengths that minimizes mse. */
-static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
-                                           int nb_strengths,
-                                           uint64_t (**mse)[TOTAL_STRENGTHS],
-                                           int sb_count, int fast) {
-  uint64_t best_tot_mse;
-  int i;
-  best_tot_mse = (uint64_t)1 << 63;
-  /* Greedy search: add one strength options at a time. */
-  for (i = 0; i < nb_strengths; i++) {
-    best_tot_mse =
-        search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast);
-  }
-  /* Trying to refine the greedy search by reconsidering each
-     already-selected option. */
-  for (i = 0; i < 4 * nb_strengths; i++) {
-    int j;
-    for (j = 0; j < nb_strengths - 1; j++) {
-      best_lev0[j] = best_lev0[j + 1];
-      best_lev1[j] = best_lev1[j + 1];
-    }
-    best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
-                                   sb_count, fast);
-  }
-  return best_tot_mse;
-}
-
-/* FIXME: SSE-optimize this. */
-static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src,
-                         int src_voffset, int src_hoffset, int sstride,
-                         int vsize, int hsize) {
-  int r, c;
-  const uint16_t *base = &src[src_voffset * sstride + src_hoffset];
-  for (r = 0; r < vsize; r++) {
-    for (c = 0; c < hsize; c++) {
-      dst[r * dstride + c] = base[r * sstride + c];
-    }
-  }
-}
-
-static INLINE uint64_t dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
-                                      int sstride, int coeff_shift) {
-  uint64_t svar = 0;
-  uint64_t dvar = 0;
-  uint64_t sum_s = 0;
-  uint64_t sum_d = 0;
-  uint64_t sum_s2 = 0;
-  uint64_t sum_d2 = 0;
-  uint64_t sum_sd = 0;
-  int i, j;
-  for (i = 0; i < 8; i++) {
-    for (j = 0; j < 8; j++) {
-      sum_s += src[i * sstride + j];
-      sum_d += dst[i * dstride + j];
-      sum_s2 += src[i * sstride + j] * src[i * sstride + j];
-      sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
-      sum_sd += src[i * sstride + j] * dst[i * dstride + j];
-    }
-  }
-  /* Compute the variance -- the calculation cannot go negative. */
-  svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
-  dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
-  return (uint64_t)floor(
-      .5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
-               (svar + dvar + (400 << 2 * coeff_shift)) /
-               (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
-}
-
-static INLINE uint64_t mse_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
-                                     int sstride) {
-  uint64_t sum = 0;
-  int i, j;
-  for (i = 0; i < 8; i++) {
-    for (j = 0; j < 8; j++) {
-      int e = dst[i * dstride + j] - src[i * sstride + j];
-      sum += e * e;
-    }
-  }
-  return sum;
-}
-
-static INLINE uint64_t mse_4x4_16bit(uint16_t *dst, int dstride, uint16_t *src,
-                                     int sstride) {
-  uint64_t sum = 0;
-  int i, j;
-  for (i = 0; i < 4; i++) {
-    for (j = 0; j < 4; j++) {
-      int e = dst[i * dstride + j] - src[i * sstride + j];
-      sum += e * e;
-    }
-  }
-  return sum;
-}
-
-/* Compute MSE only on the blocks we filtered. */
-uint64_t compute_cdef_dist(uint16_t *dst, int dstride, uint16_t *src,
-                           cdef_list *dlist, int cdef_count, BLOCK_SIZE bsize,
-                           int coeff_shift, int pli) {
-  uint64_t sum = 0;
-  int bi, bx, by;
-  if (bsize == BLOCK_8X8) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      if (pli == 0) {
-        sum += dist_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
-                              &src[bi << (3 + 3)], 8, coeff_shift);
-      } else {
-        sum += mse_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
-                             &src[bi << (3 + 3)], 8);
-      }
-    }
-  } else if (bsize == BLOCK_4X8) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      sum += mse_4x4_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
-                           &src[bi << (3 + 2)], 4);
-      sum += mse_4x4_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)], dstride,
-                           &src[(bi << (3 + 2)) + 4 * 4], 4);
-    }
-  } else if (bsize == BLOCK_8X4) {
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
-                           &src[bi << (2 + 3)], 8);
-      sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
-                           &src[(bi << (2 + 3)) + 4], 8);
-    }
-  } else {
-    assert(bsize == BLOCK_4X4);
-    for (bi = 0; bi < cdef_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
-                           &src[bi << (2 + 2)], 4);
-    }
-  }
-  return sum >> 2 * coeff_shift;
-}
-
-void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
-                     AV1_COMMON *cm, MACROBLOCKD *xd, int fast) {
-  int r, c;
-  int fbr, fbc;
-  uint16_t *src[3];
-  uint16_t *ref_coeff[3];
-  static cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
-  int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
-  int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
-  int stride[3];
-  int bsize[3];
-  int mi_wide_l2[3];
-  int mi_high_l2[3];
-  int xdec[3];
-  int ydec[3];
-  int pli;
-  int cdef_count;
-  int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
-  uint64_t best_tot_mse = (uint64_t)1 << 63;
-  uint64_t tot_mse;
-  int sb_count;
-  int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
-  int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
-  int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
-  int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
-  uint64_t(*mse[2])[TOTAL_STRENGTHS];
-  int pri_damping = 3 + (cm->base_qindex >> 6);
-  int sec_damping = 3 + (cm->base_qindex >> 6);
-  int i;
-  int nb_strengths;
-  int nb_strength_bits;
-  int quantizer;
-  double lambda;
-  const int num_planes = av1_num_planes(cm);
-  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
-  DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
-  uint16_t *in;
-  DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
-  quantizer = av1_ac_quant_Q3(cm->base_qindex, 0, cm->seq_params.bit_depth) >>
-              (cm->seq_params.bit_depth - 8);
-  lambda = .12 * quantizer * quantizer / 256.;
-
-  av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
-                       num_planes);
-  mse[0] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
-  mse[1] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
-  for (pli = 0; pli < num_planes; pli++) {
-    uint8_t *ref_buffer;
-    int ref_stride;
-    switch (pli) {
-      case 0:
-        ref_buffer = ref->y_buffer;
-        ref_stride = ref->y_stride;
-        break;
-      case 1:
-        ref_buffer = ref->u_buffer;
-        ref_stride = ref->uv_stride;
-        break;
-      case 2:
-        ref_buffer = ref->v_buffer;
-        ref_stride = ref->uv_stride;
-        break;
-    }
-    src[pli] = aom_memalign(
-        32, sizeof(*src) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE);
-    ref_coeff[pli] = aom_memalign(
-        32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE);
-    xdec[pli] = xd->plane[pli].subsampling_x;
-    ydec[pli] = xd->plane[pli].subsampling_y;
-    bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
-                           : (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
-    stride[pli] = cm->mi_cols << MI_SIZE_LOG2;
-    mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
-    mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
-
-    const int frame_height =
-        (cm->mi_rows * MI_SIZE) >> xd->plane[pli].subsampling_y;
-    const int frame_width =
-        (cm->mi_cols * MI_SIZE) >> xd->plane[pli].subsampling_x;
-
-    for (r = 0; r < frame_height; ++r) {
-      for (c = 0; c < frame_width; ++c) {
-        if (cm->seq_params.use_highbitdepth) {
-          src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR(
-              xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
-          ref_coeff[pli][r * stride[pli] + c] =
-              CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c];
-        } else {
-          src[pli][r * stride[pli] + c] =
-              xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
-          ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c];
-        }
-      }
-    }
-  }
-  in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
-  sb_count = 0;
-  for (fbr = 0; fbr < nvfb; ++fbr) {
-    for (fbc = 0; fbc < nhfb; ++fbc) {
-      int nvb, nhb;
-      int gi;
-      int dirinit = 0;
-      nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
-      nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
-      int hb_step = 1;
-      int vb_step = 1;
-      BLOCK_SIZE bs = BLOCK_64X64;
-      MB_MODE_INFO *const mbmi =
-          cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
-                              MI_SIZE_64X64 * fbc];
-      if (((fbc & 1) &&
-           (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_128X64)) ||
-          ((fbr & 1) &&
-           (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_64X128)))
-        continue;
-      if (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_128X64 ||
-          mbmi->sb_type == BLOCK_64X128)
-        bs = mbmi->sb_type;
-      if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
-        nhb = AOMMIN(MI_SIZE_128X128, cm->mi_cols - MI_SIZE_64X64 * fbc);
-        hb_step = 2;
-      }
-      if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
-        nvb = AOMMIN(MI_SIZE_128X128, cm->mi_rows - MI_SIZE_64X64 * fbr);
-        vb_step = 2;
-      }
-      // No filtering if the entire filter block is skipped
-      if (sb_all_skip(cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) continue;
-      cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64,
-                                        fbc * MI_SIZE_64X64, dlist, bs);
-      for (pli = 0; pli < num_planes; pli++) {
-        for (i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
-        for (gi = 0; gi < total_strengths; gi++) {
-          int threshold;
-          uint64_t curr_mse;
-          int sec_strength;
-          threshold = gi / CDEF_SEC_STRENGTHS;
-          if (fast) threshold = priconv[threshold];
-          /* We avoid filtering the pixels for which some of the pixels to
-             average
-             are outside the frame. We could change the filter instead, but it
-             would add special cases for any future vectorization. */
-          int yoff = CDEF_VBORDER * (fbr != 0);
-          int xoff = CDEF_HBORDER * (fbc != 0);
-          int ysize = (nvb << mi_high_l2[pli]) +
-                      CDEF_VBORDER * (fbr + vb_step < nvfb) + yoff;
-          int xsize = (nhb << mi_wide_l2[pli]) +
-                      CDEF_HBORDER * (fbc + hb_step < nhfb) + xoff;
-          sec_strength = gi % CDEF_SEC_STRENGTHS;
-          copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
-                       src[pli],
-                       (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
-                       (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
-                       stride[pli], ysize, xsize);
-          cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
-                         dir, &dirinit, var, pli, dlist, cdef_count, threshold,
-                         sec_strength + (sec_strength == 3), pri_damping,
-                         sec_damping, coeff_shift);
-          curr_mse = compute_cdef_dist(
-              ref_coeff[pli] +
-                  (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
-                  (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
-              stride[pli], tmp_dst, dlist, cdef_count, bsize[pli], coeff_shift,
-              pli);
-          if (pli < 2)
-            mse[pli][sb_count][gi] = curr_mse;
-          else
-            mse[1][sb_count][gi] += curr_mse;
-          sb_index[sb_count] =
-              MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
-        }
-      }
-      sb_count++;
-    }
-  }
-  nb_strength_bits = 0;
-  /* Search for different number of signalling bits. */
-  for (i = 0; i <= 3; i++) {
-    int j;
-    int best_lev0[CDEF_MAX_STRENGTHS];
-    int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
-    nb_strengths = 1 << i;
-    if (num_planes >= 3)
-      tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
-                                           mse, sb_count, fast);
-    else
-      tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
-                                      fast);
-    /* Count superblock signalling cost. */
-    tot_mse += (uint64_t)(sb_count * lambda * i);
-    /* Count header signalling cost. */
-    tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS);
-    if (tot_mse < best_tot_mse) {
-      best_tot_mse = tot_mse;
-      nb_strength_bits = i;
-      for (j = 0; j < 1 << nb_strength_bits; j++) {
-        cm->cdef_strengths[j] = best_lev0[j];
-        cm->cdef_uv_strengths[j] = best_lev1[j];
-      }
-    }
-  }
-  nb_strengths = 1 << nb_strength_bits;
-
-  cm->cdef_bits = nb_strength_bits;
-  cm->nb_cdef_strengths = nb_strengths;
-  for (i = 0; i < sb_count; i++) {
-    int gi;
-    int best_gi;
-    uint64_t best_mse = (uint64_t)1 << 63;
-    best_gi = 0;
-    for (gi = 0; gi < cm->nb_cdef_strengths; gi++) {
-      uint64_t curr = mse[0][i][cm->cdef_strengths[gi]];
-      if (num_planes >= 3) curr += mse[1][i][cm->cdef_uv_strengths[gi]];
-      if (curr < best_mse) {
-        best_gi = gi;
-        best_mse = curr;
-      }
-    }
-    selected_strength[i] = best_gi;
-    cm->mi_grid_visible[sb_index[i]]->cdef_strength = best_gi;
-  }
-
-  if (fast) {
-    for (int j = 0; j < nb_strengths; j++) {
-      cm->cdef_strengths[j] =
-          priconv[cm->cdef_strengths[j] / CDEF_SEC_STRENGTHS] *
-              CDEF_SEC_STRENGTHS +
-          (cm->cdef_strengths[j] % CDEF_SEC_STRENGTHS);
-      cm->cdef_uv_strengths[j] =
-          priconv[cm->cdef_uv_strengths[j] / CDEF_SEC_STRENGTHS] *
-              CDEF_SEC_STRENGTHS +
-          (cm->cdef_uv_strengths[j] % CDEF_SEC_STRENGTHS);
-    }
-  }
-  cm->cdef_pri_damping = pri_damping;
-  cm->cdef_sec_damping = sec_damping;
-  aom_free(mse[0]);
-  aom_free(mse[1]);
-  for (pli = 0; pli < num_planes; pli++) {
-    aom_free(src[pli]);
-    aom_free(ref_coeff[pli]);
-  }
-  aom_free(sb_index);
-  aom_free(selected_strength);
-}
diff --git a/third_party/aom/av1/encoder/picklpf.c b/third_party/aom/av1/encoder/picklpf.c
deleted file mode 100644
index c5508e25c..000000000
--- a/third_party/aom/av1/encoder/picklpf.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-
-#include "config/aom_scale_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/psnr.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/quant_common.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/picklpf.h"
-
-static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc,
-                            YV12_BUFFER_CONFIG *dst_bc, int plane) {
-  switch (plane) {
-    case 0: aom_yv12_copy_y(src_bc, dst_bc); break;
-    case 1: aom_yv12_copy_u(src_bc, dst_bc); break;
-    case 2: aom_yv12_copy_v(src_bc, dst_bc); break;
-    default: assert(plane >= 0 && plane <= 2); break;
-  }
-}
-
-int av1_get_max_filter_level(const AV1_COMP *cpi) {
-  if (cpi->oxcf.pass == 2) {
-    return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
-                                                 : MAX_LOOP_FILTER;
-  } else {
-    return MAX_LOOP_FILTER;
-  }
-}
-
-static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
-                                AV1_COMP *const cpi, int filt_level,
-                                int partial_frame, int plane, int dir) {
-  AV1_COMMON *const cm = &cpi->common;
-  int64_t filt_err;
-
-  assert(plane >= 0 && plane <= 2);
-  int filter_level[2] = { filt_level, filt_level };
-  if (plane == 0 && dir == 0) filter_level[1] = cm->lf.filter_level[1];
-  if (plane == 0 && dir == 1) filter_level[0] = cm->lf.filter_level[0];
-
-  // set base filters for use of get_filter_level when in DELTA_Q_LF mode
-  switch (plane) {
-    case 0:
-      cm->lf.filter_level[0] = filter_level[0];
-      cm->lf.filter_level[1] = filter_level[1];
-      break;
-    case 1: cm->lf.filter_level_u = filter_level[0]; break;
-    case 2: cm->lf.filter_level_v = filter_level[0]; break;
-  }
-
-      // TODO(any): please enable multi-thread and remove the flag when loop
-      // filter mask is compatible with multi-thread.
-#if LOOP_FILTER_BITMASK
-  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, 0, plane,
-                        plane + 1, partial_frame);
-#else
-  if (cpi->num_workers > 1)
-    av1_loop_filter_frame_mt(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
-                             plane + 1, partial_frame, cpi->workers,
-                             cpi->num_workers, &cpi->lf_row_sync);
-  else
-    av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane,
-                          plane + 1, partial_frame);
-#endif
-
-  filt_err = aom_get_sse_plane(sd, cm->frame_to_show, plane,
-                               cm->seq_params.use_highbitdepth);
-
-  // Re-instate the unfiltered frame
-  yv12_copy_plane(&cpi->last_frame_uf, cm->frame_to_show, plane);
-
-  return filt_err;
-}
-
-static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
-                               int partial_frame,
-                               const int *last_frame_filter_level,
-                               double *best_cost_ret, int plane, int dir) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int min_filter_level = 0;
-  const int max_filter_level = av1_get_max_filter_level(cpi);
-  int filt_direction = 0;
-  int64_t best_err;
-  int filt_best;
-  MACROBLOCK *x = &cpi->td.mb;
-
-  // Start the search at the previous frame filter level unless it is now out of
-  // range.
-  int lvl;
-  switch (plane) {
-    case 0: lvl = last_frame_filter_level[dir]; break;
-    case 1: lvl = last_frame_filter_level[2]; break;
-    case 2: lvl = last_frame_filter_level[3]; break;
-    default: assert(plane >= 0 && plane <= 2); return 0;
-  }
-  int filt_mid = clamp(lvl, min_filter_level, max_filter_level);
-  int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
-  // Sum squared error at each filter level
-  int64_t ss_err[MAX_LOOP_FILTER + 1];
-
-  // Set each entry to -1
-  memset(ss_err, 0xFF, sizeof(ss_err));
-  yv12_copy_plane(cm->frame_to_show, &cpi->last_frame_uf, plane);
-  best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane, dir);
-  filt_best = filt_mid;
-  ss_err[filt_mid] = best_err;
-
-  while (filter_step > 0) {
-    const int filt_high = AOMMIN(filt_mid + filter_step, max_filter_level);
-    const int filt_low = AOMMAX(filt_mid - filter_step, min_filter_level);
-
-    // Bias against raising loop filter in favor of lowering it.
-    int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
-
-    if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
-      bias = (bias * cpi->twopass.section_intra_rating) / 20;
-
-    // yx, bias less for large block size
-    if (cm->tx_mode != ONLY_4X4) bias >>= 1;
-
-    if (filt_direction <= 0 && filt_low != filt_mid) {
-      // Get Low filter error score
-      if (ss_err[filt_low] < 0) {
-        ss_err[filt_low] =
-            try_filter_frame(sd, cpi, filt_low, partial_frame, plane, dir);
-      }
-      // If value is close to the best so far then bias towards a lower loop
-      // filter value.
-      if (ss_err[filt_low] < (best_err + bias)) {
-        // Was it actually better than the previous best?
-        if (ss_err[filt_low] < best_err) {
-          best_err = ss_err[filt_low];
-        }
-        filt_best = filt_low;
-      }
-    }
-
-    // Now look at filt_high
-    if (filt_direction >= 0 && filt_high != filt_mid) {
-      if (ss_err[filt_high] < 0) {
-        ss_err[filt_high] =
-            try_filter_frame(sd, cpi, filt_high, partial_frame, plane, dir);
-      }
-      // If value is significantly better than previous best, bias added against
-      // raising filter value
-      if (ss_err[filt_high] < (best_err - bias)) {
-        best_err = ss_err[filt_high];
-        filt_best = filt_high;
-      }
-    }
-
-    // Half the step distance if the best filter value was the same as last time
-    if (filt_best == filt_mid) {
-      filter_step /= 2;
-      filt_direction = 0;
-    } else {
-      filt_direction = (filt_best < filt_mid) ? -1 : 1;
-      filt_mid = filt_best;
-    }
-  }
-
-  // Update best error
-  best_err = ss_err[filt_best];
-
-  if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err);
-  return filt_best;
-}
-
-void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
-                           LPF_PICK_METHOD method) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  struct loopfilter *const lf = &cm->lf;
-  (void)sd;
-
-  lf->sharpness_level = 0;
-  cpi->td.mb.rdmult = cpi->rd.RDMULT;
-
-  if (method == LPF_PICK_MINIMAL_LPF) {
-    lf->filter_level[0] = 0;
-    lf->filter_level[1] = 0;
-  } else if (method >= LPF_PICK_FROM_Q) {
-    const int min_filter_level = 0;
-    const int max_filter_level = av1_get_max_filter_level(cpi);
-    const int q = av1_ac_quant_Q3(cm->base_qindex, 0, cm->seq_params.bit_depth);
-    // These values were determined by linear fitting the result of the
-    // searched level for 8 bit depth:
-    // Keyframes: filt_guess = q * 0.06699 - 1.60817
-    // Other frames: filt_guess = q * 0.02295 + 2.48225
-    //
-    // And high bit depth separately:
-    // filt_guess = q * 0.316206 + 3.87252
-    int filt_guess;
-    switch (cm->seq_params.bit_depth) {
-      case AOM_BITS_8:
-        filt_guess = (cm->frame_type == KEY_FRAME)
-                         ? ROUND_POWER_OF_TWO(q * 17563 - 421574, 18)
-                         : ROUND_POWER_OF_TWO(q * 6017 + 650707, 18);
-        break;
-      case AOM_BITS_10:
-        filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
-        break;
-      case AOM_BITS_12:
-        filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
-        break;
-      default:
-        assert(0 &&
-               "bit_depth should be AOM_BITS_8, AOM_BITS_10 "
-               "or AOM_BITS_12");
-        return;
-    }
-    if (cm->seq_params.bit_depth != AOM_BITS_8 && cm->frame_type == KEY_FRAME)
-      filt_guess -= 4;
-    // TODO(chengchen): retrain the model for Y, U, V filter levels
-    lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level);
-    lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level);
-    lf->filter_level_u = clamp(filt_guess, min_filter_level, max_filter_level);
-    lf->filter_level_v = clamp(filt_guess, min_filter_level, max_filter_level);
-  } else {
-    const int last_frame_filter_level[4] = { lf->filter_level[0],
-                                             lf->filter_level[1],
-                                             lf->filter_level_u,
-                                             lf->filter_level_v };
-
-    lf->filter_level[0] = lf->filter_level[1] =
-        search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                            last_frame_filter_level, NULL, 0, 2);
-    lf->filter_level[0] =
-        search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                            last_frame_filter_level, NULL, 0, 0);
-    lf->filter_level[1] =
-        search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                            last_frame_filter_level, NULL, 0, 1);
-
-    if (num_planes > 1) {
-      lf->filter_level_u =
-          search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                              last_frame_filter_level, NULL, 1, 0);
-      lf->filter_level_v =
-          search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                              last_frame_filter_level, NULL, 2, 0);
-    }
-  }
-}
diff --git a/third_party/aom/av1/encoder/picklpf.h b/third_party/aom/av1/encoder/picklpf.h
deleted file mode 100644
index 357097ae1..000000000
--- a/third_party/aom/av1/encoder/picklpf.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_PICKLPF_H_
-#define AOM_AV1_ENCODER_PICKLPF_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/encoder.h"
-
-struct yv12_buffer_config;
-struct AV1_COMP;
-int av1_get_max_filter_level(const AV1_COMP *cpi);
-void av1_pick_filter_level(const struct yv12_buffer_config *sd,
-                           struct AV1_COMP *cpi, LPF_PICK_METHOD method);
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_PICKLPF_H_
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c
deleted file mode 100644
index e7804f6b4..000000000
--- a/third_party/aom/av1/encoder/pickrst.c
+++ /dev/null
@@ -1,1362 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <float.h>
-#include <limits.h>
-#include <math.h>
-
-#include "config/aom_scale_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/binary_codes_writer.h"
-#include "aom_dsp/psnr.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/restoration.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/mathutils.h"
-#include "av1/encoder/picklpf.h"
-#include "av1/encoder/pickrst.h"
-
-// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
-// When set to RESTORE_TYPES we allow switchable.
-static const RestorationType force_restore_type = RESTORE_TYPES;
-
-// Number of Wiener iterations
-#define NUM_WIENER_ITERS 5
-
-// Penalty factor for use of dual sgr
-#define DUAL_SGR_PENALTY_MULT 0.01
-
-const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
-
-typedef int64_t (*sse_extractor_type)(const YV12_BUFFER_CONFIG *a,
-                                      const YV12_BUFFER_CONFIG *b);
-typedef int64_t (*sse_part_extractor_type)(const YV12_BUFFER_CONFIG *a,
-                                           const YV12_BUFFER_CONFIG *b,
-                                           int hstart, int width, int vstart,
-                                           int height);
-
-#define NUM_EXTRACTORS (3 * (1 + 1))
-
-static const sse_part_extractor_type sse_part_extractors[NUM_EXTRACTORS] = {
-  aom_get_y_sse_part,        aom_get_u_sse_part,
-  aom_get_v_sse_part,        aom_highbd_get_y_sse_part,
-  aom_highbd_get_u_sse_part, aom_highbd_get_v_sse_part,
-};
-
-static int64_t sse_restoration_unit(const RestorationTileLimits *limits,
-                                    const YV12_BUFFER_CONFIG *src,
-                                    const YV12_BUFFER_CONFIG *dst, int plane,
-                                    int highbd) {
-  return sse_part_extractors[3 * highbd + plane](
-      src, dst, limits->h_start, limits->h_end - limits->h_start,
-      limits->v_start, limits->v_end - limits->v_start);
-}
-
-typedef struct {
-  // The best coefficients for Wiener or Sgrproj restoration
-  WienerInfo wiener;
-  SgrprojInfo sgrproj;
-
-  // The sum of squared errors for this rtype.
-  int64_t sse[RESTORE_SWITCHABLE_TYPES];
-
-  // The rtype to use for this unit given a frame rtype as
-  // index. Indices: WIENER, SGRPROJ, SWITCHABLE.
-  RestorationType best_rtype[RESTORE_TYPES - 1];
-} RestUnitSearchInfo;
-
-typedef struct {
-  const YV12_BUFFER_CONFIG *src;
-  YV12_BUFFER_CONFIG *dst;
-
-  const AV1_COMMON *cm;
-  const MACROBLOCK *x;
-  int plane;
-  int plane_width;
-  int plane_height;
-  RestUnitSearchInfo *rusi;
-
-  // Speed features
-  const SPEED_FEATURES *sf;
-
-  uint8_t *dgd_buffer;
-  int dgd_stride;
-  const uint8_t *src_buffer;
-  int src_stride;
-
-  // sse and bits are initialised by reset_rsc in search_rest_type
-  int64_t sse;
-  int64_t bits;
-  int tile_y0, tile_stripe0;
-
-  // sgrproj and wiener are initialised by rsc_on_tile when starting the first
-  // tile in the frame.
-  SgrprojInfo sgrproj;
-  WienerInfo wiener;
-  AV1PixelRect tile_rect;
-} RestSearchCtxt;
-
-static void rsc_on_tile(int tile_row, int tile_col, void *priv) {
-  (void)tile_col;
-
-  RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
-  set_default_sgrproj(&rsc->sgrproj);
-  set_default_wiener(&rsc->wiener);
-
-  rsc->tile_stripe0 =
-      (tile_row == 0) ? 0 : rsc->cm->rst_end_stripe[tile_row - 1];
-}
-
-static void reset_rsc(RestSearchCtxt *rsc) {
-  rsc->sse = 0;
-  rsc->bits = 0;
-}
-
-static void init_rsc(const YV12_BUFFER_CONFIG *src, const AV1_COMMON *cm,
-                     const MACROBLOCK *x, const SPEED_FEATURES *sf, int plane,
-                     RestUnitSearchInfo *rusi, YV12_BUFFER_CONFIG *dst,
-                     RestSearchCtxt *rsc) {
-  rsc->src = src;
-  rsc->dst = dst;
-  rsc->cm = cm;
-  rsc->x = x;
-  rsc->plane = plane;
-  rsc->rusi = rusi;
-  rsc->sf = sf;
-
-  const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
-  const int is_uv = plane != AOM_PLANE_Y;
-  rsc->plane_width = src->crop_widths[is_uv];
-  rsc->plane_height = src->crop_heights[is_uv];
-  rsc->src_buffer = src->buffers[plane];
-  rsc->src_stride = src->strides[is_uv];
-  rsc->dgd_buffer = dgd->buffers[plane];
-  rsc->dgd_stride = dgd->strides[is_uv];
-  rsc->tile_rect = av1_whole_frame_rect(cm, is_uv);
-  assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
-  assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
-}
-
-static int64_t try_restoration_unit(const RestSearchCtxt *rsc,
-                                    const RestorationTileLimits *limits,
-                                    const AV1PixelRect *tile_rect,
-                                    const RestorationUnitInfo *rui) {
-  const AV1_COMMON *const cm = rsc->cm;
-  const int plane = rsc->plane;
-  const int is_uv = plane > 0;
-  const RestorationInfo *rsi = &cm->rst_info[plane];
-  RestorationLineBuffers rlbs;
-  const int bit_depth = cm->seq_params.bit_depth;
-  const int highbd = cm->seq_params.use_highbitdepth;
-
-  const YV12_BUFFER_CONFIG *fts = cm->frame_to_show;
-  // TODO(yunqing): For now, only use optimized LR filter in decoder. Can be
-  // also used in encoder.
-  const int optimized_lr = 0;
-
-  av1_loop_restoration_filter_unit(
-      limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
-      is_uv && cm->seq_params.subsampling_x,
-      is_uv && cm->seq_params.subsampling_y, highbd, bit_depth,
-      fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane],
-      rsc->dst->strides[is_uv], cm->rst_tmpbuf, optimized_lr);
-
-  return sse_restoration_unit(limits, rsc->src, rsc->dst, plane, highbd);
-}
-
-int64_t av1_lowbd_pixel_proj_error_c(const uint8_t *src8, int width, int height,
-                                     int src_stride, const uint8_t *dat8,
-                                     int dat_stride, int32_t *flt0,
-                                     int flt0_stride, int32_t *flt1,
-                                     int flt1_stride, int xq[2],
-                                     const sgr_params_type *params) {
-  int i, j;
-  const uint8_t *src = src8;
-  const uint8_t *dat = dat8;
-  int64_t err = 0;
-  if (params->r[0] > 0 && params->r[1] > 0) {
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j < width; ++j) {
-        assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
-        assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
-        const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
-        int32_t v = u << SGRPROJ_PRJ_BITS;
-        v += xq[0] * (flt0[j] - u) + xq[1] * (flt1[j] - u);
-        const int32_t e =
-            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt0 += flt0_stride;
-      flt1 += flt1_stride;
-    }
-  } else if (params->r[0] > 0) {
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j < width; ++j) {
-        assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
-        const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
-        int32_t v = u << SGRPROJ_PRJ_BITS;
-        v += xq[0] * (flt0[j] - u);
-        const int32_t e =
-            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt0 += flt0_stride;
-    }
-  } else if (params->r[1] > 0) {
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j < width; ++j) {
-        assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
-        const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
-        int32_t v = u << SGRPROJ_PRJ_BITS;
-        v += xq[1] * (flt1[j] - u);
-        const int32_t e =
-            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt1 += flt1_stride;
-    }
-  } else {
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j < width; ++j) {
-        const int32_t e = (int32_t)(dat[j]) - src[j];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-    }
-  }
-
-  return err;
-}
-
-static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
-                                    int src_stride, const uint8_t *dat8,
-                                    int dat_stride, int use_highbitdepth,
-                                    int32_t *flt0, int flt0_stride,
-                                    int32_t *flt1, int flt1_stride, int *xqd,
-                                    const sgr_params_type *params) {
-  int i, j;
-  int64_t err = 0;
-  int xq[2];
-  decode_xq(xqd, xq, params);
-  if (!use_highbitdepth) {
-    err = av1_lowbd_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                     dat_stride, flt0, flt0_stride, flt1,
-                                     flt1_stride, xq, params);
-  } else {
-    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-    const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
-    const int32_t half = 1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS - 1);
-    if (params->r[0] > 0 && params->r[1] > 0) {
-      int xq0 = xq[0];
-      int xq1 = xq[1];
-      for (i = 0; i < height; ++i) {
-        for (j = 0; j < width; ++j) {
-          const int32_t d = dat[j];
-          const int32_t s = src[j];
-          const int32_t u = (int32_t)(d << SGRPROJ_RST_BITS);
-          int32_t v0 = flt0[j] - u;
-          int32_t v1 = flt1[j] - u;
-          int32_t v = half;
-          v += xq0 * v0;
-          v += xq1 * v1;
-          const int32_t e =
-              (v >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS)) + d - s;
-          err += e * e;
-        }
-        dat += dat_stride;
-        flt0 += flt0_stride;
-        flt1 += flt1_stride;
-        src += src_stride;
-      }
-    } else if (params->r[0] > 0 || params->r[1] > 0) {
-      int exq;
-      int32_t *flt;
-      int flt_stride;
-      if (params->r[0] > 0) {
-        exq = xq[0];
-        flt = flt0;
-        flt_stride = flt0_stride;
-      } else {
-        exq = xq[1];
-        flt = flt1;
-        flt_stride = flt1_stride;
-      }
-      for (i = 0; i < height; ++i) {
-        for (j = 0; j < width; ++j) {
-          const int32_t d = dat[j];
-          const int32_t s = src[j];
-          const int32_t u = (int32_t)(d << SGRPROJ_RST_BITS);
-          int32_t v = half;
-          v += exq * (flt[j] - u);
-          const int32_t e =
-              (v >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS)) + d - s;
-          err += e * e;
-        }
-        dat += dat_stride;
-        flt += flt_stride;
-        src += src_stride;
-      }
-    } else {
-      for (i = 0; i < height; ++i) {
-        for (j = 0; j < width; ++j) {
-          const int32_t d = dat[j];
-          const int32_t s = src[j];
-          const int32_t e = d - s;
-          err += e * e;
-        }
-        dat += dat_stride;
-        src += src_stride;
-      }
-    }
-  }
-  return err;
-}
-
-#define USE_SGRPROJ_REFINEMENT_SEARCH 1
-static int64_t finer_search_pixel_proj_error(
-    const uint8_t *src8, int width, int height, int src_stride,
-    const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt0,
-    int flt0_stride, int32_t *flt1, int flt1_stride, int start_step, int *xqd,
-    const sgr_params_type *params) {
-  int64_t err = get_pixel_proj_error(
-      src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt0,
-      flt0_stride, flt1, flt1_stride, xqd, params);
-  (void)start_step;
-#if USE_SGRPROJ_REFINEMENT_SEARCH
-  int64_t err2;
-  int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 };
-  int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
-  for (int s = start_step; s >= 1; s >>= 1) {
-    for (int p = 0; p < 2; ++p) {
-      if ((params->r[0] == 0 && p == 0) || (params->r[1] == 0 && p == 1)) {
-        continue;
-      }
-      int skip = 0;
-      do {
-        if (xqd[p] - s >= tap_min[p]) {
-          xqd[p] -= s;
-          err2 =
-              get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                   dat_stride, use_highbitdepth, flt0,
-                                   flt0_stride, flt1, flt1_stride, xqd, params);
-          if (err2 > err) {
-            xqd[p] += s;
-          } else {
-            err = err2;
-            skip = 1;
-            // At the highest step size continue moving in the same direction
-            if (s == start_step) continue;
-          }
-        }
-        break;
-      } while (1);
-      if (skip) break;
-      do {
-        if (xqd[p] + s <= tap_max[p]) {
-          xqd[p] += s;
-          err2 =
-              get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                   dat_stride, use_highbitdepth, flt0,
-                                   flt0_stride, flt1, flt1_stride, xqd, params);
-          if (err2 > err) {
-            xqd[p] -= s;
-          } else {
-            err = err2;
-            // At the highest step size continue moving in the same direction
-            if (s == start_step) continue;
-          }
-        }
-        break;
-      } while (1);
-    }
-  }
-#endif  // USE_SGRPROJ_REFINEMENT_SEARCH
-  return err;
-}
-
-static void get_proj_subspace(const uint8_t *src8, int width, int height,
-                              int src_stride, const uint8_t *dat8,
-                              int dat_stride, int use_highbitdepth,
-                              int32_t *flt0, int flt0_stride, int32_t *flt1,
-                              int flt1_stride, int *xq,
-                              const sgr_params_type *params) {
-  int i, j;
-  double H[2][2] = { { 0, 0 }, { 0, 0 } };
-  double C[2] = { 0, 0 };
-  double Det;
-  double x[2];
-  const int size = width * height;
-
-  aom_clear_system_state();
-
-  // Default
-  xq[0] = 0;
-  xq[1] = 0;
-  if (!use_highbitdepth) {
-    const uint8_t *src = src8;
-    const uint8_t *dat = dat8;
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j < width; ++j) {
-        const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
-        const double s =
-            (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
-        const double f1 =
-            (params->r[0] > 0) ? (double)flt0[i * flt0_stride + j] - u : 0;
-        const double f2 =
-            (params->r[1] > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
-        H[0][0] += f1 * f1;
-        H[1][1] += f2 * f2;
-        H[0][1] += f1 * f2;
-        C[0] += f1 * s;
-        C[1] += f2 * s;
-      }
-    }
-  } else {
-    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-    const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j < width; ++j) {
-        const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
-        const double s =
-            (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
-        const double f1 =
-            (params->r[0] > 0) ? (double)flt0[i * flt0_stride + j] - u : 0;
-        const double f2 =
-            (params->r[1] > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
-        H[0][0] += f1 * f1;
-        H[1][1] += f2 * f2;
-        H[0][1] += f1 * f2;
-        C[0] += f1 * s;
-        C[1] += f2 * s;
-      }
-    }
-  }
-  H[0][0] /= size;
-  H[0][1] /= size;
-  H[1][1] /= size;
-  H[1][0] = H[0][1];
-  C[0] /= size;
-  C[1] /= size;
-  if (params->r[0] == 0) {
-    // H matrix is now only the scalar H[1][1]
-    // C vector is now only the scalar C[1]
-    Det = H[1][1];
-    if (Det < 1e-8) return;  // ill-posed, return default values
-    x[0] = 0;
-    x[1] = C[1] / Det;
-
-    xq[0] = 0;
-    xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
-  } else if (params->r[1] == 0) {
-    // H matrix is now only the scalar H[0][0]
-    // C vector is now only the scalar C[0]
-    Det = H[0][0];
-    if (Det < 1e-8) return;  // ill-posed, return default values
-    x[0] = C[0] / Det;
-    x[1] = 0;
-
-    xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
-    xq[1] = 0;
-  } else {
-    Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
-    if (Det < 1e-8) return;  // ill-posed, return default values
-    x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
-    x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
-
-    xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
-    xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
-  }
-}
-
-void encode_xq(int *xq, int *xqd, const sgr_params_type *params) {
-  if (params->r[0] == 0) {
-    xqd[0] = 0;
-    xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xq[1], SGRPROJ_PRJ_MIN1,
-                   SGRPROJ_PRJ_MAX1);
-  } else if (params->r[1] == 0) {
-    xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
-    xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0], SGRPROJ_PRJ_MIN1,
-                   SGRPROJ_PRJ_MAX1);
-  } else {
-    xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
-    xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1], SGRPROJ_PRJ_MIN1,
-                   SGRPROJ_PRJ_MAX1);
-  }
-}
-
-// Apply the self-guided filter across an entire restoration unit.
-static void apply_sgr(int sgr_params_idx, const uint8_t *dat8, int width,
-                      int height, int dat_stride, int use_highbd, int bit_depth,
-                      int pu_width, int pu_height, int32_t *flt0, int32_t *flt1,
-                      int flt_stride) {
-  for (int i = 0; i < height; i += pu_height) {
-    const int h = AOMMIN(pu_height, height - i);
-    int32_t *flt0_row = flt0 + i * flt_stride;
-    int32_t *flt1_row = flt1 + i * flt_stride;
-    const uint8_t *dat8_row = dat8 + i * dat_stride;
-
-    // Iterate over the stripe in blocks of width pu_width
-    for (int j = 0; j < width; j += pu_width) {
-      const int w = AOMMIN(pu_width, width - j);
-      const int ret = av1_selfguided_restoration(
-          dat8_row + j, w, h, dat_stride, flt0_row + j, flt1_row + j,
-          flt_stride, sgr_params_idx, bit_depth, use_highbd);
-      (void)ret;
-      assert(!ret);
-    }
-  }
-}
-
-static SgrprojInfo search_selfguided_restoration(
-    const uint8_t *dat8, int width, int height, int dat_stride,
-    const uint8_t *src8, int src_stride, int use_highbitdepth, int bit_depth,
-    int pu_width, int pu_height, int32_t *rstbuf) {
-  int32_t *flt0 = rstbuf;
-  int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
-  int ep, bestep = 0;
-  int64_t besterr = -1;
-  int exqd[2], bestxqd[2] = { 0, 0 };
-  int flt_stride = ((width + 7) & ~7) + 8;
-  assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
-         pu_width == RESTORATION_PROC_UNIT_SIZE);
-  assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
-         pu_height == RESTORATION_PROC_UNIT_SIZE);
-
-  for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
-    int exq[2];
-    apply_sgr(ep, dat8, width, height, dat_stride, use_highbitdepth, bit_depth,
-              pu_width, pu_height, flt0, flt1, flt_stride);
-    aom_clear_system_state();
-    const sgr_params_type *const params = &sgr_params[ep];
-    get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
-                      use_highbitdepth, flt0, flt_stride, flt1, flt_stride, exq,
-                      params);
-    aom_clear_system_state();
-    encode_xq(exq, exqd, params);
-    int64_t err = finer_search_pixel_proj_error(
-        src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
-        flt0, flt_stride, flt1, flt_stride, 2, exqd, params);
-    if (besterr == -1 || err < besterr) {
-      bestep = ep;
-      besterr = err;
-      bestxqd[0] = exqd[0];
-      bestxqd[1] = exqd[1];
-    }
-  }
-
-  SgrprojInfo ret;
-  ret.ep = bestep;
-  ret.xqd[0] = bestxqd[0];
-  ret.xqd[1] = bestxqd[1];
-  return ret;
-}
-
-static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
-                              SgrprojInfo *ref_sgrproj_info) {
-  int bits = SGRPROJ_PARAMS_BITS;
-  const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
-  if (params->r[0] > 0)
-    bits += aom_count_primitive_refsubexpfin(
-        SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
-        ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
-        sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
-  if (params->r[1] > 0)
-    bits += aom_count_primitive_refsubexpfin(
-        SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
-        ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
-        sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
-  return bits;
-}
-
-static void search_sgrproj(const RestorationTileLimits *limits,
-                           const AV1PixelRect *tile, int rest_unit_idx,
-                           void *priv, int32_t *tmpbuf,
-                           RestorationLineBuffers *rlbs) {
-  (void)rlbs;
-  RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
-  RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
-
-  const MACROBLOCK *const x = rsc->x;
-  const AV1_COMMON *const cm = rsc->cm;
-  const int highbd = cm->seq_params.use_highbitdepth;
-  const int bit_depth = cm->seq_params.bit_depth;
-
-  uint8_t *dgd_start =
-      rsc->dgd_buffer + limits->v_start * rsc->dgd_stride + limits->h_start;
-  const uint8_t *src_start =
-      rsc->src_buffer + limits->v_start * rsc->src_stride + limits->h_start;
-
-  const int is_uv = rsc->plane > 0;
-  const int ss_x = is_uv && cm->seq_params.subsampling_x;
-  const int ss_y = is_uv && cm->seq_params.subsampling_y;
-  const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
-  const int procunit_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
-
-  rusi->sgrproj = search_selfguided_restoration(
-      dgd_start, limits->h_end - limits->h_start,
-      limits->v_end - limits->v_start, rsc->dgd_stride, src_start,
-      rsc->src_stride, highbd, bit_depth, procunit_width, procunit_height,
-      tmpbuf);
-
-  RestorationUnitInfo rui;
-  rui.restoration_type = RESTORE_SGRPROJ;
-  rui.sgrproj_info = rusi->sgrproj;
-
-  rusi->sse[RESTORE_SGRPROJ] = try_restoration_unit(rsc, limits, tile, &rui);
-
-  const int64_t bits_none = x->sgrproj_restore_cost[0];
-  const int64_t bits_sgr = x->sgrproj_restore_cost[1] +
-                           (count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj)
-                            << AV1_PROB_COST_SHIFT);
-
-  double cost_none =
-      RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
-  double cost_sgr =
-      RDCOST_DBL(x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ]);
-  if (rusi->sgrproj.ep < 10)
-    cost_sgr *= (1 + DUAL_SGR_PENALTY_MULT * rsc->sf->dual_sgr_penalty_level);
-
-  RestorationType rtype =
-      (cost_sgr < cost_none) ? RESTORE_SGRPROJ : RESTORE_NONE;
-  rusi->best_rtype[RESTORE_SGRPROJ - 1] = rtype;
-
-  rsc->sse += rusi->sse[rtype];
-  rsc->bits += (cost_sgr < cost_none) ? bits_sgr : bits_none;
-  if (cost_sgr < cost_none) rsc->sgrproj = rusi->sgrproj;
-}
-
-void av1_compute_stats_c(int wiener_win, const uint8_t *dgd, const uint8_t *src,
-                         int h_start, int h_end, int v_start, int v_end,
-                         int dgd_stride, int src_stride, double *M, double *H) {
-  int i, j, k, l;
-  double Y[WIENER_WIN2];
-  const int wiener_win2 = wiener_win * wiener_win;
-  const int wiener_halfwin = (wiener_win >> 1);
-  const double avg =
-      find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
-  memset(M, 0, sizeof(*M) * wiener_win2);
-  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
-  for (i = v_start; i < v_end; i++) {
-    for (j = h_start; j < h_end; j++) {
-      const double X = (double)src[i * src_stride + j] - avg;
-      int idx = 0;
-      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
-        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
-          Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
-          idx++;
-        }
-      }
-      assert(idx == wiener_win2);
-      for (k = 0; k < wiener_win2; ++k) {
-        M[k] += Y[k] * X;
-        for (l = k; l < wiener_win2; ++l) {
-          // H is a symmetric matrix, so we only need to fill out the upper
-          // triangle here. We can copy it down to the lower triangle outside
-          // the (i, j) loops.
-          H[k * wiener_win2 + l] += Y[k] * Y[l];
-        }
-      }
-    }
-  }
-  for (k = 0; k < wiener_win2; ++k) {
-    for (l = k + 1; l < wiener_win2; ++l) {
-      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
-    }
-  }
-}
-
-static double find_average_highbd(const uint16_t *src, int h_start, int h_end,
-                                  int v_start, int v_end, int stride) {
-  uint64_t sum = 0;
-  double avg = 0;
-  int i, j;
-  aom_clear_system_state();
-  for (i = v_start; i < v_end; i++)
-    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
-  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
-  return avg;
-}
-
-static AOM_FORCE_INLINE void compute_stats_highbd(
-    int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start,
-    int h_end, int v_start, int v_end, int dgd_stride, int src_stride,
-    double *M, double *H) {
-  int i, j, k, l;
-  double Y[WIENER_WIN2];
-  const int wiener_win2 = wiener_win * wiener_win;
-  const int wiener_halfwin = (wiener_win >> 1);
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
-  const double avg =
-      find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
-  memset(M, 0, sizeof(*M) * wiener_win2);
-  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
-  for (i = v_start; i < v_end; i++) {
-    for (j = h_start; j < h_end; j++) {
-      const double X = (double)src[i * src_stride + j] - avg;
-      int idx = 0;
-      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
-        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
-          Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
-          idx++;
-        }
-      }
-      assert(idx == wiener_win2);
-      for (k = 0; k < wiener_win2; ++k) {
-        double Yk = Y[k];
-        M[k] += Yk * X;
-        double *H2 = &H[k * wiener_win2];
-        H2[k] += Yk * Yk;
-        for (l = k + 1; l < wiener_win2; ++l) {
-          // H is a symmetric matrix, so we only need to fill out the upper
-          // triangle here. We can copy it down to the lower triangle outside
-          // the (i, j) loops.
-          H2[l] += Yk * Y[l];
-        }
-      }
-    }
-  }
-  for (k = 0; k < wiener_win2; ++k) {
-    for (l = k + 1; l < wiener_win2; ++l) {
-      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
-    }
-  }
-}
-
-static INLINE int wrap_index(int i, int wiener_win) {
-  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
-  return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
-}
-
-// Fix vector b, update vector a
-static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc,
-                             double *a, double *b) {
-  int i, j;
-  double S[WIENER_WIN];
-  double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
-  const int wiener_win2 = wiener_win * wiener_win;
-  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
-  memset(A, 0, sizeof(A));
-  memset(B, 0, sizeof(B));
-  for (i = 0; i < wiener_win; i++) {
-    for (j = 0; j < wiener_win; ++j) {
-      const int jj = wrap_index(j, wiener_win);
-      A[jj] += Mc[i][j] * b[i];
-    }
-  }
-  for (i = 0; i < wiener_win; i++) {
-    for (j = 0; j < wiener_win; j++) {
-      int k, l;
-      for (k = 0; k < wiener_win; ++k)
-        for (l = 0; l < wiener_win; ++l) {
-          const int kk = wrap_index(k, wiener_win);
-          const int ll = wrap_index(l, wiener_win);
-          B[ll * wiener_halfwin1 + kk] +=
-              Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j];
-        }
-    }
-  }
-  // Normalization enforcement in the system of equations itself
-  for (i = 0; i < wiener_halfwin1 - 1; ++i)
-    A[i] -=
-        A[wiener_halfwin1 - 1] * 2 +
-        B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
-        2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
-  for (i = 0; i < wiener_halfwin1 - 1; ++i)
-    for (j = 0; j < wiener_halfwin1 - 1; ++j)
-      B[i * wiener_halfwin1 + j] -=
-          2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
-               B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
-               2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
-                     (wiener_halfwin1 - 1)]);
-  if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
-    S[wiener_halfwin1 - 1] = 1.0;
-    for (i = wiener_halfwin1; i < wiener_win; ++i) {
-      S[i] = S[wiener_win - 1 - i];
-      S[wiener_halfwin1 - 1] -= 2 * S[i];
-    }
-    memcpy(a, S, wiener_win * sizeof(*a));
-  }
-}
-
-// Fix vector a, update vector b
-static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc,
-                             double *a, double *b) {
-  int i, j;
-  double S[WIENER_WIN];
-  double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
-  const int wiener_win2 = wiener_win * wiener_win;
-  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
-  memset(A, 0, sizeof(A));
-  memset(B, 0, sizeof(B));
-  for (i = 0; i < wiener_win; i++) {
-    const int ii = wrap_index(i, wiener_win);
-    for (j = 0; j < wiener_win; j++) A[ii] += Mc[i][j] * a[j];
-  }
-
-  for (i = 0; i < wiener_win; i++) {
-    for (j = 0; j < wiener_win; j++) {
-      const int ii = wrap_index(i, wiener_win);
-      const int jj = wrap_index(j, wiener_win);
-      int k, l;
-      for (k = 0; k < wiener_win; ++k)
-        for (l = 0; l < wiener_win; ++l)
-          B[jj * wiener_halfwin1 + ii] +=
-              Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] * a[l];
-    }
-  }
-  // Normalization enforcement in the system of equations itself
-  for (i = 0; i < wiener_halfwin1 - 1; ++i)
-    A[i] -=
-        A[wiener_halfwin1 - 1] * 2 +
-        B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
-        2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
-  for (i = 0; i < wiener_halfwin1 - 1; ++i)
-    for (j = 0; j < wiener_halfwin1 - 1; ++j)
-      B[i * wiener_halfwin1 + j] -=
-          2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
-               B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
-               2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
-                     (wiener_halfwin1 - 1)]);
-  if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
-    S[wiener_halfwin1 - 1] = 1.0;
-    for (i = wiener_halfwin1; i < wiener_win; ++i) {
-      S[i] = S[wiener_win - 1 - i];
-      S[wiener_halfwin1 - 1] -= 2 * S[i];
-    }
-    memcpy(b, S, wiener_win * sizeof(*b));
-  }
-}
-
-static int wiener_decompose_sep_sym(int wiener_win, double *M, double *H,
-                                    double *a, double *b) {
-  static const int init_filt[WIENER_WIN] = {
-    WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV,
-    WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV,
-    WIENER_FILT_TAP0_MIDV,
-  };
-  double *Hc[WIENER_WIN2];
-  double *Mc[WIENER_WIN];
-  int i, j, iter;
-  const int plane_off = (WIENER_WIN - wiener_win) >> 1;
-  const int wiener_win2 = wiener_win * wiener_win;
-  for (i = 0; i < wiener_win; i++) {
-    a[i] = b[i] = (double)init_filt[i + plane_off] / WIENER_FILT_STEP;
-  }
-  for (i = 0; i < wiener_win; i++) {
-    Mc[i] = M + i * wiener_win;
-    for (j = 0; j < wiener_win; j++) {
-      Hc[i * wiener_win + j] =
-          H + i * wiener_win * wiener_win2 + j * wiener_win;
-    }
-  }
-
-  iter = 1;
-  while (iter < NUM_WIENER_ITERS) {
-    update_a_sep_sym(wiener_win, Mc, Hc, a, b);
-    update_b_sep_sym(wiener_win, Mc, Hc, a, b);
-    iter++;
-  }
-  return 1;
-}
-
-// Computes the function x'*H*x - x'*M for the learned 2D filter x, and compares
-// against identity filters; Final score is defined as the difference between
-// the function values
-static double compute_score(int wiener_win, double *M, double *H,
-                            InterpKernel vfilt, InterpKernel hfilt) {
-  double ab[WIENER_WIN * WIENER_WIN];
-  int i, k, l;
-  double P = 0, Q = 0;
-  double iP = 0, iQ = 0;
-  double Score, iScore;
-  double a[WIENER_WIN], b[WIENER_WIN];
-  const int plane_off = (WIENER_WIN - wiener_win) >> 1;
-  const int wiener_win2 = wiener_win * wiener_win;
-
-  aom_clear_system_state();
-
-  a[WIENER_HALFWIN] = b[WIENER_HALFWIN] = 1.0;
-  for (i = 0; i < WIENER_HALFWIN; ++i) {
-    a[i] = a[WIENER_WIN - i - 1] = (double)vfilt[i] / WIENER_FILT_STEP;
-    b[i] = b[WIENER_WIN - i - 1] = (double)hfilt[i] / WIENER_FILT_STEP;
-    a[WIENER_HALFWIN] -= 2 * a[i];
-    b[WIENER_HALFWIN] -= 2 * b[i];
-  }
-  memset(ab, 0, sizeof(ab));
-  for (k = 0; k < wiener_win; ++k) {
-    for (l = 0; l < wiener_win; ++l)
-      ab[k * wiener_win + l] = a[l + plane_off] * b[k + plane_off];
-  }
-  for (k = 0; k < wiener_win2; ++k) {
-    P += ab[k] * M[k];
-    for (l = 0; l < wiener_win2; ++l)
-      Q += ab[k] * H[k * wiener_win2 + l] * ab[l];
-  }
-  Score = Q - 2 * P;
-
-  iP = M[wiener_win2 >> 1];
-  iQ = H[(wiener_win2 >> 1) * wiener_win2 + (wiener_win2 >> 1)];
-  iScore = iQ - 2 * iP;
-
-  return Score - iScore;
-}
-
-static void quantize_sym_filter(int wiener_win, double *f, InterpKernel fi) {
-  int i;
-  const int wiener_halfwin = (wiener_win >> 1);
-  for (i = 0; i < wiener_halfwin; ++i) {
-    fi[i] = RINT(f[i] * WIENER_FILT_STEP);
-  }
-  // Specialize for 7-tap filter
-  if (wiener_win == WIENER_WIN) {
-    fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV);
-    fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
-    fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
-  } else {
-    fi[2] = CLIP(fi[1], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
-    fi[1] = CLIP(fi[0], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
-    fi[0] = 0;
-  }
-  // Satisfy filter constraints
-  fi[WIENER_WIN - 1] = fi[0];
-  fi[WIENER_WIN - 2] = fi[1];
-  fi[WIENER_WIN - 3] = fi[2];
-  // The central element has an implicit +WIENER_FILT_STEP
-  fi[3] = -2 * (fi[0] + fi[1] + fi[2]);
-}
-
-static int count_wiener_bits(int wiener_win, WienerInfo *wiener_info,
-                             WienerInfo *ref_wiener_info) {
-  int bits = 0;
-  if (wiener_win == WIENER_WIN)
-    bits += aom_count_primitive_refsubexpfin(
-        WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-        WIENER_FILT_TAP0_SUBEXP_K,
-        ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
-        wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
-  bits += aom_count_primitive_refsubexpfin(
-      WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
-      WIENER_FILT_TAP1_SUBEXP_K,
-      ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV,
-      wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV);
-  bits += aom_count_primitive_refsubexpfin(
-      WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
-      WIENER_FILT_TAP2_SUBEXP_K,
-      ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
-      wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV);
-  if (wiener_win == WIENER_WIN)
-    bits += aom_count_primitive_refsubexpfin(
-        WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-        WIENER_FILT_TAP0_SUBEXP_K,
-        ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
-        wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
-  bits += aom_count_primitive_refsubexpfin(
-      WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
-      WIENER_FILT_TAP1_SUBEXP_K,
-      ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV,
-      wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV);
-  bits += aom_count_primitive_refsubexpfin(
-      WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
-      WIENER_FILT_TAP2_SUBEXP_K,
-      ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV,
-      wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV);
-  return bits;
-}
-
-#define USE_WIENER_REFINEMENT_SEARCH 1
-static int64_t finer_tile_search_wiener(const RestSearchCtxt *rsc,
-                                        const RestorationTileLimits *limits,
-                                        const AV1PixelRect *tile,
-                                        RestorationUnitInfo *rui,
-                                        int wiener_win) {
-  const int plane_off = (WIENER_WIN - wiener_win) >> 1;
-  int64_t err = try_restoration_unit(rsc, limits, tile, rui);
-#if USE_WIENER_REFINEMENT_SEARCH
-  int64_t err2;
-  int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
-                    WIENER_FILT_TAP2_MINV };
-  int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV,
-                    WIENER_FILT_TAP2_MAXV };
-
-  WienerInfo *plane_wiener = &rui->wiener_info;
-
-  // printf("err  pre = %"PRId64"\n", err);
-  const int start_step = 4;
-  for (int s = start_step; s >= 1; s >>= 1) {
-    for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
-      int skip = 0;
-      do {
-        if (plane_wiener->hfilter[p] - s >= tap_min[p]) {
-          plane_wiener->hfilter[p] -= s;
-          plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
-          plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s;
-          err2 = try_restoration_unit(rsc, limits, tile, rui);
-          if (err2 > err) {
-            plane_wiener->hfilter[p] += s;
-            plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
-            plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s;
-          } else {
-            err = err2;
-            skip = 1;
-            // At the highest step size continue moving in the same direction
-            if (s == start_step) continue;
-          }
-        }
-        break;
-      } while (1);
-      if (skip) break;
-      do {
-        if (plane_wiener->hfilter[p] + s <= tap_max[p]) {
-          plane_wiener->hfilter[p] += s;
-          plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
-          plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s;
-          err2 = try_restoration_unit(rsc, limits, tile, rui);
-          if (err2 > err) {
-            plane_wiener->hfilter[p] -= s;
-            plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
-            plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s;
-          } else {
-            err = err2;
-            // At the highest step size continue moving in the same direction
-            if (s == start_step) continue;
-          }
-        }
-        break;
-      } while (1);
-    }
-    for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
-      int skip = 0;
-      do {
-        if (plane_wiener->vfilter[p] - s >= tap_min[p]) {
-          plane_wiener->vfilter[p] -= s;
-          plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
-          plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s;
-          err2 = try_restoration_unit(rsc, limits, tile, rui);
-          if (err2 > err) {
-            plane_wiener->vfilter[p] += s;
-            plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
-            plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s;
-          } else {
-            err = err2;
-            skip = 1;
-            // At the highest step size continue moving in the same direction
-            if (s == start_step) continue;
-          }
-        }
-        break;
-      } while (1);
-      if (skip) break;
-      do {
-        if (plane_wiener->vfilter[p] + s <= tap_max[p]) {
-          plane_wiener->vfilter[p] += s;
-          plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
-          plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s;
-          err2 = try_restoration_unit(rsc, limits, tile, rui);
-          if (err2 > err) {
-            plane_wiener->vfilter[p] -= s;
-            plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
-            plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s;
-          } else {
-            err = err2;
-            // At the highest step size continue moving in the same direction
-            if (s == start_step) continue;
-          }
-        }
-        break;
-      } while (1);
-    }
-  }
-// printf("err post = %"PRId64"\n", err);
-#endif  // USE_WIENER_REFINEMENT_SEARCH
-  return err;
-}
-
-static void search_wiener(const RestorationTileLimits *limits,
-                          const AV1PixelRect *tile_rect, int rest_unit_idx,
-                          void *priv, int32_t *tmpbuf,
-                          RestorationLineBuffers *rlbs) {
-  (void)tmpbuf;
-  (void)rlbs;
-  RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
-  RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
-
-  const int wiener_win =
-      (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
-
-  double M[WIENER_WIN2];
-  double H[WIENER_WIN2 * WIENER_WIN2];
-  double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
-
-  const AV1_COMMON *const cm = rsc->cm;
-  if (cm->seq_params.use_highbitdepth) {
-    compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
-                         limits->h_start, limits->h_end, limits->v_start,
-                         limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
-  } else {
-    av1_compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
-                      limits->h_start, limits->h_end, limits->v_start,
-                      limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
-  }
-
-  const MACROBLOCK *const x = rsc->x;
-  const int64_t bits_none = x->wiener_restore_cost[0];
-
-  if (!wiener_decompose_sep_sym(wiener_win, M, H, vfilterd, hfilterd)) {
-    rsc->bits += bits_none;
-    rsc->sse += rusi->sse[RESTORE_NONE];
-    rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
-    rusi->sse[RESTORE_WIENER] = INT64_MAX;
-    return;
-  }
-
-  RestorationUnitInfo rui;
-  memset(&rui, 0, sizeof(rui));
-  rui.restoration_type = RESTORE_WIENER;
-  quantize_sym_filter(wiener_win, vfilterd, rui.wiener_info.vfilter);
-  quantize_sym_filter(wiener_win, hfilterd, rui.wiener_info.hfilter);
-
-  // Filter score computes the value of the function x'*A*x - x'*b for the
-  // learned filter and compares it against identity filer. If there is no
-  // reduction in the function, the filter is reverted back to identity
-  if (compute_score(wiener_win, M, H, rui.wiener_info.vfilter,
-                    rui.wiener_info.hfilter) > 0) {
-    rsc->bits += bits_none;
-    rsc->sse += rusi->sse[RESTORE_NONE];
-    rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
-    rusi->sse[RESTORE_WIENER] = INT64_MAX;
-    return;
-  }
-
-  aom_clear_system_state();
-
-  rusi->sse[RESTORE_WIENER] =
-      finer_tile_search_wiener(rsc, limits, tile_rect, &rui, wiener_win);
-  rusi->wiener = rui.wiener_info;
-
-  if (wiener_win != WIENER_WIN) {
-    assert(rui.wiener_info.vfilter[0] == 0 &&
-           rui.wiener_info.vfilter[WIENER_WIN - 1] == 0);
-    assert(rui.wiener_info.hfilter[0] == 0 &&
-           rui.wiener_info.hfilter[WIENER_WIN - 1] == 0);
-  }
-
-  const int64_t bits_wiener =
-      x->wiener_restore_cost[1] +
-      (count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener)
-       << AV1_PROB_COST_SHIFT);
-
-  double cost_none =
-      RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]);
-  double cost_wiener =
-      RDCOST_DBL(x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER]);
-
-  RestorationType rtype =
-      (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
-  rusi->best_rtype[RESTORE_WIENER - 1] = rtype;
-
-  rsc->sse += rusi->sse[rtype];
-  rsc->bits += (cost_wiener < cost_none) ? bits_wiener : bits_none;
-  if (cost_wiener < cost_none) rsc->wiener = rusi->wiener;
-}
-
-static void search_norestore(const RestorationTileLimits *limits,
-                             const AV1PixelRect *tile_rect, int rest_unit_idx,
-                             void *priv, int32_t *tmpbuf,
-                             RestorationLineBuffers *rlbs) {
-  (void)tile_rect;
-  (void)tmpbuf;
-  (void)rlbs;
-
-  RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
-  RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
-
-  const int highbd = rsc->cm->seq_params.use_highbitdepth;
-  rusi->sse[RESTORE_NONE] = sse_restoration_unit(
-      limits, rsc->src, rsc->cm->frame_to_show, rsc->plane, highbd);
-
-  rsc->sse += rusi->sse[RESTORE_NONE];
-}
-
-static void search_switchable(const RestorationTileLimits *limits,
-                              const AV1PixelRect *tile_rect, int rest_unit_idx,
-                              void *priv, int32_t *tmpbuf,
-                              RestorationLineBuffers *rlbs) {
-  (void)limits;
-  (void)tile_rect;
-  (void)tmpbuf;
-  (void)rlbs;
-  RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
-  RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
-
-  const MACROBLOCK *const x = rsc->x;
-
-  const int wiener_win =
-      (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
-
-  double best_cost = 0;
-  int64_t best_bits = 0;
-  RestorationType best_rtype = RESTORE_NONE;
-
-  for (RestorationType r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) {
-    // Check for the condition that wiener or sgrproj search could not
-    // find a solution or the solution was worse than RESTORE_NONE.
-    // In either case the best_rtype will be set as RESTORE_NONE. These
-    // should be skipped from the test below.
-    if (r > RESTORE_NONE) {
-      if (rusi->best_rtype[r - 1] == RESTORE_NONE) continue;
-    }
-
-    const int64_t sse = rusi->sse[r];
-    int64_t coeff_pcost = 0;
-    switch (r) {
-      case RESTORE_NONE: coeff_pcost = 0; break;
-      case RESTORE_WIENER:
-        coeff_pcost =
-            count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener);
-        break;
-      case RESTORE_SGRPROJ:
-        coeff_pcost = count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj);
-        break;
-      default: assert(0); break;
-    }
-    const int64_t coeff_bits = coeff_pcost << AV1_PROB_COST_SHIFT;
-    const int64_t bits = x->switchable_restore_cost[r] + coeff_bits;
-    double cost = RDCOST_DBL(x->rdmult, bits >> 4, sse);
-    if (r == RESTORE_SGRPROJ && rusi->sgrproj.ep < 10)
-      cost *= (1 + DUAL_SGR_PENALTY_MULT * rsc->sf->dual_sgr_penalty_level);
-    if (r == 0 || cost < best_cost) {
-      best_cost = cost;
-      best_bits = bits;
-      best_rtype = r;
-    }
-  }
-
-  rusi->best_rtype[RESTORE_SWITCHABLE - 1] = best_rtype;
-
-  rsc->sse += rusi->sse[best_rtype];
-  rsc->bits += best_bits;
-  if (best_rtype == RESTORE_WIENER) rsc->wiener = rusi->wiener;
-  if (best_rtype == RESTORE_SGRPROJ) rsc->sgrproj = rusi->sgrproj;
-}
-
-static void copy_unit_info(RestorationType frame_rtype,
-                           const RestUnitSearchInfo *rusi,
-                           RestorationUnitInfo *rui) {
-  assert(frame_rtype > 0);
-  rui->restoration_type = rusi->best_rtype[frame_rtype - 1];
-  if (rui->restoration_type == RESTORE_WIENER)
-    rui->wiener_info = rusi->wiener;
-  else
-    rui->sgrproj_info = rusi->sgrproj;
-}
-
-static double search_rest_type(RestSearchCtxt *rsc, RestorationType rtype) {
-  static const rest_unit_visitor_t funs[RESTORE_TYPES] = {
-    search_norestore, search_wiener, search_sgrproj, search_switchable
-  };
-
-  reset_rsc(rsc);
-  rsc_on_tile(LR_TILE_ROW, LR_TILE_COL, rsc);
-  av1_foreach_rest_unit_in_plane(rsc->cm, rsc->plane, funs[rtype], rsc,
-                                 &rsc->tile_rect, rsc->cm->rst_tmpbuf, NULL);
-  return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4, rsc->sse);
-}
-
-static int rest_tiles_in_plane(const AV1_COMMON *cm, int plane) {
-  const RestorationInfo *rsi = &cm->rst_info[plane];
-  return rsi->units_per_tile;
-}
-
-void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  assert(!cm->all_lossless);
-
-  int ntiles[2];
-  for (int is_uv = 0; is_uv < 2; ++is_uv)
-    ntiles[is_uv] = rest_tiles_in_plane(cm, is_uv);
-
-  assert(ntiles[1] <= ntiles[0]);
-  RestUnitSearchInfo *rusi =
-      (RestUnitSearchInfo *)aom_memalign(16, sizeof(*rusi) * ntiles[0]);
-
-  // If the restoration unit dimensions are not multiples of
-  // rsi->restoration_unit_size then some elements of the rusi array may be
-  // left uninitialised when we reach copy_unit_info(...). This is not a
-  // problem, as these elements are ignored later, but in order to quiet
-  // Valgrind's warnings we initialise the array below.
-  memset(rusi, 0, sizeof(*rusi) * ntiles[0]);
-  cpi->td.mb.rdmult = cpi->rd.RDMULT;
-
-  RestSearchCtxt rsc;
-  const int plane_start = AOM_PLANE_Y;
-  const int plane_end = num_planes > 1 ? AOM_PLANE_V : AOM_PLANE_Y;
-  for (int plane = plane_start; plane <= plane_end; ++plane) {
-    init_rsc(src, &cpi->common, &cpi->td.mb, &cpi->sf, plane, rusi,
-             &cpi->trial_frame_rst, &rsc);
-
-    const int plane_ntiles = ntiles[plane > 0];
-    const RestorationType num_rtypes =
-        (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES;
-
-    double best_cost = 0;
-    RestorationType best_rtype = RESTORE_NONE;
-
-    const int highbd = rsc.cm->seq_params.use_highbitdepth;
-    extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
-                 rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER,
-                 highbd);
-
-    for (RestorationType r = 0; r < num_rtypes; ++r) {
-      if ((force_restore_type != RESTORE_TYPES) && (r != RESTORE_NONE) &&
-          (r != force_restore_type))
-        continue;
-
-      double cost = search_rest_type(&rsc, r);
-
-      if (r == 0 || cost < best_cost) {
-        best_cost = cost;
-        best_rtype = r;
-      }
-    }
-
-    cm->rst_info[plane].frame_restoration_type = best_rtype;
-    if (force_restore_type != RESTORE_TYPES)
-      assert(best_rtype == force_restore_type || best_rtype == RESTORE_NONE);
-
-    if (best_rtype != RESTORE_NONE) {
-      for (int u = 0; u < plane_ntiles; ++u) {
-        copy_unit_info(best_rtype, &rusi[u], &cm->rst_info[plane].unit_info[u]);
-      }
-    }
-  }
-
-  aom_free(rusi);
-}
diff --git a/third_party/aom/av1/encoder/pickrst.h b/third_party/aom/av1/encoder/pickrst.h
deleted file mode 100644
index 3fec0c34b..000000000
--- a/third_party/aom/av1/encoder/pickrst.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_ENCODER_PICKRST_H_
-#define AOM_AV1_ENCODER_PICKRST_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/encoder.h"
-#include "aom_ports/system_state.h"
-
-struct yv12_buffer_config;
-struct AV1_COMP;
-
-static const uint8_t g_shuffle_stats_data[16] = {
-  0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
-};
-
-static INLINE double find_average(const uint8_t *src, int h_start, int h_end,
-                                  int v_start, int v_end, int stride) {
-  uint64_t sum = 0;
-  double avg = 0;
-  int i, j;
-  aom_clear_system_state();
-  for (i = v_start; i < v_end; i++)
-    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
-  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
-  return avg;
-}
-
-void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_PICKRST_H_
diff --git a/third_party/aom/av1/encoder/pustats.h b/third_party/aom/av1/encoder/pustats.h
deleted file mode 100644
index 40dd46768..000000000
--- a/third_party/aom/av1/encoder/pustats.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_PUSTATS_H_
-#define AOM_AV1_ENCODER_PUSTATS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/ml.h"
-
-#define NUM_FEATURES_PUSTATS 8
-#define NUM_HIDDEN_LAYERS 2
-#define HIDDEN_LAYERS_0_NODES 12
-#define HIDDEN_LAYERS_1_NODES 10
-#define LOGITS_NODES 1
-
-static const float
-    av1_pustats_rate_hiddenlayer_0_kernel[NUM_FEATURES_PUSTATS *
-                                          HIDDEN_LAYERS_0_NODES] = {
-      -0.1758f, -0.0499f, -10.0069f, -2.2838f,  -0.3359f,  0.3459f,  -0.3285f,
-      -0.0515f, -0.5417f, 0.2357f,   -0.0575f,  -69.0782f, 0.5348f,  1.4068f,
-      0.2213f,  -1.0490f, -0.0636f,  0.1654f,   1.1002f,   33.4924f, 0.4358f,
-      1.2499f,  0.1143f,  0.0592f,   -1.6335f,  -0.0092f,  1.2207f,  -28.4543f,
-      -0.4973f, 0.4368f,  0.2341f,   -0.1623f,  -3.8986f,  0.1311f,  -1.8789f,
-      -3.9079f, -0.8158f, -0.8420f,  1.4295f,   -2.3629f,  -1.4825f, 0.6498f,
-      -5.3669f, 6.4434f,  1.8393f,   -35.0678f, 3.7459f,   -2.8504f, 2.0502f,
-      -0.1812f, -3.9011f, -1.0155f,  1.8375f,   -1.4517f,  1.3917f,  3.8664f,
-      0.8345f,  -0.3472f, 5.7740f,   -1.1196f,  -0.3264f,  -1.2481f, -0.9284f,
-      -4.9657f, 2.2831f,  0.7337f,   2.3176f,   0.6416f,   0.8804f,  1.9988f,
-      -1.3426f, 1.2728f,  1.2249f,   -0.1551f,  5.6045f,   0.2046f,  -2.1464f,
-      -2.4922f, -0.5334f, 12.1055f,  7.2467f,   -0.0070f,  0.0234f,  0.0021f,
-      0.0215f,  -0.0098f, -0.0682f,  -6.1494f,  -0.3176f,  -1.6069f, -0.2119f,
-      -1.0533f, -0.3566f, 0.5294f,   -0.4335f,  0.1626f,
-    };
-
-static const float av1_pustats_rate_hiddenlayer_0_bias[HIDDEN_LAYERS_0_NODES] =
-    {
-      10.5266f, 5.3268f, -1.0678f, 7.7411f,  8.7164f,  -0.3235f,
-      7.3028f,  9.0874f, -6.4594f, -1.0102f, -1.1146f, 10.8419f,
-    };
-
-static const float
-    av1_pustats_rate_hiddenlayer_1_kernel[HIDDEN_LAYERS_0_NODES *
-                                          HIDDEN_LAYERS_1_NODES] = {
-      10.5932f,  2.5192f,  -0.0015f, 5.9479f,   5.2426f,   -0.4091f, 5.3220f,
-      6.0469f,   0.7200f,  3.3241f,  5.5006f,   12.8290f,  -1.6396f, 0.5743f,
-      -0.8370f,  1.9956f,  -4.9270f, -1.5295f,  2.1350f,   -9.4415f, -0.7094f,
-      5.1822f,   19.7287f, -3.0444f, -0.3320f,  0.0031f,   -0.2709f, -0.5249f,
-      0.3281f,   -0.2240f, 0.2225f,  -0.2386f,  -0.4370f,  -0.2438f, -0.4928f,
-      -0.2842f,  -2.1772f, 9.2570f,  -17.6655f, 3.5448f,   -2.8394f, -1.0167f,
-      -0.5115f,  -1.9260f, -0.2111f, -0.7528f,  -1.2387f,  -0.0401f, 5.0716f,
-      -3.3763f,  -0.2898f, -0.4956f, -7.9993f,  0.1526f,   -0.0242f, 0.7354f,
-      6.0432f,   4.8043f,  7.4790f,  -0.6295f,  1.7565f,   3.7197f,  -2.3963f,
-      6.8945f,   2.9717f,  -3.1623f, 3.4241f,   4.4676f,   -1.8154f, -2.9401f,
-      -8.5657f,  -3.0240f, -1.4661f, 8.1145f,   -12.7858f, 3.3624f,  -1.0819f,
-      -4.2856f,  1.1801f,  -0.5587f, -1.6062f,  -1.1813f,  -3.5882f, -0.2490f,
-      -24.9566f, -0.4140f, -0.1113f, 3.5537f,   4.4112f,   0.1367f,  -1.5876f,
-      1.6605f,   1.3903f,  -0.0253f, -2.1419f,  -2.2197f,  -0.7659f, -0.4249f,
-      -0.0424f,  0.1486f,  0.4643f,  -0.9068f,  -0.3619f,  -0.7624f, -0.9132f,
-      -0.4947f,  -0.3527f, -0.5445f, -0.4768f,  -1.7761f,  -1.0686f, 0.5462f,
-      1.3371f,   4.3116f,  0.0777f,  -2.7216f,  -1.8908f,  3.4989f,  7.7269f,
-      -2.7566f,
-    };
-
-static const float av1_pustats_rate_hiddenlayer_1_bias[HIDDEN_LAYERS_1_NODES] =
-    {
-      13.2435f, -8.5477f, -0.0998f, -1.5131f, -12.0187f,
-      6.1715f,  0.5094f,  7.6433f,  -0.3992f, -1.3555f,
-    };
-
-static const float
-    av1_pustats_rate_logits_kernel[HIDDEN_LAYERS_1_NODES * LOGITS_NODES] = {
-      4.3078f, -17.3497f, 0.0195f,  34.6032f, -5.0127f,
-      5.3079f, 10.0077f,  -13.129f, 0.0087f,  -8.4009f,
-    };
-
-static const float av1_pustats_rate_logits_bias[LOGITS_NODES] = {
-  4.5103f,
-};
-
-static const NN_CONFIG av1_pustats_rate_nnconfig = {
-  NUM_FEATURES_PUSTATS,                              // num_inputs
-  LOGITS_NODES,                                      // num_outputs
-  NUM_HIDDEN_LAYERS,                                 // num_hidden_layers
-  { HIDDEN_LAYERS_0_NODES, HIDDEN_LAYERS_1_NODES },  // num_hidden_nodes
-  {
-      av1_pustats_rate_hiddenlayer_0_kernel,
-      av1_pustats_rate_hiddenlayer_1_kernel,
-      av1_pustats_rate_logits_kernel,
-  },
-  {
-      av1_pustats_rate_hiddenlayer_0_bias,
-      av1_pustats_rate_hiddenlayer_1_bias,
-      av1_pustats_rate_logits_bias,
-  },
-};
-
-static const float
-    av1_pustats_dist_hiddenlayer_0_kernel[NUM_FEATURES_PUSTATS *
-                                          HIDDEN_LAYERS_0_NODES] = {
-      -0.2560f, 0.1105f,  -0.8434f, -0.0132f, -8.9371f, -1.1176f, -0.3655f,
-      0.4885f,  1.7518f,  0.4985f,  0.5582f,  -0.3739f, 0.9403f,  0.3874f,
-      0.3265f,  1.7383f,  3.1747f,  0.0285f,  3.3942f,  -0.0123f, 0.5057f,
-      0.1584f,  0.2697f,  4.6151f,  3.6251f,  -0.0121f, -1.0047f, -0.0037f,
-      0.0127f,  0.1935f,  -0.5277f, -2.7144f, 0.0729f,  -0.1457f, -0.0816f,
-      -0.5462f, 0.4738f,  0.3599f,  -0.0564f, 0.0910f,  0.0126f,  -0.0310f,
-      -2.1311f, -0.4666f, -0.0074f, -0.0765f, 0.0287f,  -0.2662f, -0.0999f,
-      -0.2983f, -0.4899f, -0.2314f, 0.2873f,  -0.3614f, 0.1783f,  -0.1210f,
-      0.3569f,  0.5436f,  -8.0536f, -0.0044f, -1.5255f, -0.8247f, -0.4556f,
-      1.9045f,  0.5463f,  0.1102f,  -0.9293f, -0.0185f, -0.8302f, -0.4378f,
-      -0.3531f, -1.3095f, 0.6099f,  0.7977f,  4.1950f,  -0.0067f, -0.2762f,
-      -0.1574f, -0.2149f, 0.6104f,  -1.7053f, 0.1904f,  4.2402f,  -0.2671f,
-      0.8940f,  0.6820f,  0.2241f,  -0.9459f, 1.4571f,  0.5255f,  2.3352f,
-      -0.0806f, 0.5231f,  0.3928f,  0.4146f,  2.0956f,
-    };
-
-static const float av1_pustats_dist_hiddenlayer_0_bias[HIDDEN_LAYERS_0_NODES] =
-    {
-      1.1597f, 0.0836f, -0.7471f, -0.2439f, -0.0438f, 2.4626f,
-      0.f,     1.1485f, 2.7085f,  -4.7897f, 1.4093f,  -1.657f,
-    };
-
-static const float
-    av1_pustats_dist_hiddenlayer_1_kernel[HIDDEN_LAYERS_0_NODES *
-                                          HIDDEN_LAYERS_1_NODES] = {
-      -0.5203f, -1.3468f, 0.3865f,  -0.6859f, 0.0058f,  4.0682f,  0.4807f,
-      -0.1380f, 0.6050f,  0.8958f,  0.7748f,  -0.1311f, 1.7317f,  1.1265f,
-      0.0827f,  0.1407f,  -0.3605f, 0.5429f,  0.1880f,  -0.1439f, 0.2837f,
-      1.6477f,  0.0832f,  0.0593f,  -1.8464f, -0.7241f, -1.0672f, -0.3546f,
-      -0.3842f, -2.3637f, 0.2514f,  0.8263f,  -0.1872f, 0.5774f,  -0.3610f,
-      -0.0205f, 1.3977f,  -0.1083f, 0.6923f,  1.3039f,  -0.2870f, 1.0622f,
-      -0.0566f, 0.2697f,  -0.5429f, -0.6193f, 1.7559f,  0.3246f,  1.9159f,
-      0.3744f,  0.0686f,  1.0191f,  -0.4212f, 1.9591f,  -0.0691f, -0.1085f,
-      -1.2034f, 0.0606f,  1.0116f,  0.5565f,  -0.1874f, -0.7898f, 0.4796f,
-      0.2290f,  0.4334f,  -0.5817f, -0.2949f, 0.1367f,  -0.2932f, -1.1265f,
-      0.0133f,  -0.5309f, -3.3191f, 0.0939f,  0.3895f,  -2.5812f, -0.0066f,
-      -3.0063f, -0.2982f, 0.7309f,  -0.2422f, -0.2770f, -0.7152f, 0.1700f,
-      1.9630f,  0.1988f,  0.4194f,  0.8762f,  0.3402f,  0.1051f,  -0.1598f,
-      0.2405f,  0.0392f,  1.1256f,  1.5245f,  0.0950f,  0.2160f,  -0.5023f,
-      0.2584f,  0.2074f,  0.2218f,  0.3966f,  -0.0921f, -0.2435f, -0.4560f,
-      -1.1923f, -0.3716f, -0.3286f, -1.3225f, 0.1896f,  -0.3342f, -0.7888f,
-      -0.4488f, -1.7168f, 0.3341f,  0.1146f,  0.5226f,  0.2610f,  -0.4574f,
-      -0.4164f,
-    };
-
-static const float av1_pustats_dist_hiddenlayer_1_bias[HIDDEN_LAYERS_1_NODES] =
-    {
-      -2.3014f, -2.4292f, 1.3317f, -3.2361f, -1.918f,
-      2.7149f,  -2.5649f, 2.7765f, 2.9617f,  2.7684f,
-    };
-
-static const float
-    av1_pustats_dist_logits_kernel[HIDDEN_LAYERS_1_NODES * LOGITS_NODES] = {
-      -0.6868f, -0.6715f, 0.449f,  -1.293f, 0.6214f,
-      0.9894f,  -0.4342f, 0.7002f, 1.4363f, 0.6951f,
-    };
-
-static const float av1_pustats_dist_logits_bias[LOGITS_NODES] = {
-  2.3371f,
-};
-
-static const NN_CONFIG av1_pustats_dist_nnconfig = {
-  NUM_FEATURES_PUSTATS,                              // num_inputs
-  LOGITS_NODES,                                      // num_outputs
-  NUM_HIDDEN_LAYERS,                                 // num_hidden_layers
-  { HIDDEN_LAYERS_0_NODES, HIDDEN_LAYERS_1_NODES },  // num_hidden_nodes
-  {
-      av1_pustats_dist_hiddenlayer_0_kernel,
-      av1_pustats_dist_hiddenlayer_1_kernel,
-      av1_pustats_dist_logits_kernel,
-  },
-  {
-      av1_pustats_dist_hiddenlayer_0_bias,
-      av1_pustats_dist_hiddenlayer_1_bias,
-      av1_pustats_dist_logits_bias,
-  },
-};
-
-#undef NUM_HIDDEN_LAYERS
-#undef HIDDEN_LAYERS_0_NODES
-#undef HIDDEN_LAYERS_1_NODES
-#undef LOGITS_NODES
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_PUSTATS_H_
diff --git a/third_party/aom/av1/encoder/random.h b/third_party/aom/av1/encoder/random.h
deleted file mode 100644
index 0bca39102..000000000
--- a/third_party/aom/av1/encoder/random.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RANDOM_H_
-#define AOM_AV1_ENCODER_RANDOM_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Generate a random number in the range [0, 32768).
-static INLINE unsigned int lcg_rand16(unsigned int *state) {
-  *state = (unsigned int)(*state * 1103515245ULL + 12345);
-  return *state / 65536 % 32768;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_RANDOM_H_
diff --git a/third_party/aom/av1/encoder/ransac.c b/third_party/aom/av1/encoder/ransac.c
deleted file mode 100644
index 781f528eb..000000000
--- a/third_party/aom/av1/encoder/ransac.c
+++ /dev/null
@@ -1,603 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <memory.h>
-#include <math.h>
-#include <time.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "av1/encoder/ransac.h"
-#include "av1/encoder/mathutils.h"
-#include "av1/encoder/random.h"
-
-#define MAX_MINPTS 4
-#define MAX_DEGENERATE_ITER 10
-#define MINPTS_MULTIPLIER 5
-
-#define INLIER_THRESHOLD 1.0
-#define MIN_TRIALS 20
-
-////////////////////////////////////////////////////////////////////////////////
-// ransac
-typedef int (*IsDegenerateFunc)(double *p);
-typedef void (*NormalizeFunc)(double *p, int np, double *T);
-typedef void (*DenormalizeFunc)(double *params, double *T1, double *T2);
-typedef int (*FindTransformationFunc)(int points, double *points1,
-                                      double *points2, double *params);
-typedef void (*ProjectPointsDoubleFunc)(double *mat, double *points,
-                                        double *proj, const int n,
-                                        const int stride_points,
-                                        const int stride_proj);
-
-static void project_points_double_translation(double *mat, double *points,
-                                              double *proj, const int n,
-                                              const int stride_points,
-                                              const int stride_proj) {
-  int i;
-  for (i = 0; i < n; ++i) {
-    const double x = *(points++), y = *(points++);
-    *(proj++) = x + mat[0];
-    *(proj++) = y + mat[1];
-    points += stride_points - 2;
-    proj += stride_proj - 2;
-  }
-}
-
-static void project_points_double_rotzoom(double *mat, double *points,
-                                          double *proj, const int n,
-                                          const int stride_points,
-                                          const int stride_proj) {
-  int i;
-  for (i = 0; i < n; ++i) {
-    const double x = *(points++), y = *(points++);
-    *(proj++) = mat[2] * x + mat[3] * y + mat[0];
-    *(proj++) = -mat[3] * x + mat[2] * y + mat[1];
-    points += stride_points - 2;
-    proj += stride_proj - 2;
-  }
-}
-
-static void project_points_double_affine(double *mat, double *points,
-                                         double *proj, const int n,
-                                         const int stride_points,
-                                         const int stride_proj) {
-  int i;
-  for (i = 0; i < n; ++i) {
-    const double x = *(points++), y = *(points++);
-    *(proj++) = mat[2] * x + mat[3] * y + mat[0];
-    *(proj++) = mat[4] * x + mat[5] * y + mat[1];
-    points += stride_points - 2;
-    proj += stride_proj - 2;
-  }
-}
-
-static void normalize_homography(double *pts, int n, double *T) {
-  double *p = pts;
-  double mean[2] = { 0, 0 };
-  double msqe = 0;
-  double scale;
-  int i;
-
-  assert(n > 0);
-  for (i = 0; i < n; ++i, p += 2) {
-    mean[0] += p[0];
-    mean[1] += p[1];
-  }
-  mean[0] /= n;
-  mean[1] /= n;
-  for (p = pts, i = 0; i < n; ++i, p += 2) {
-    p[0] -= mean[0];
-    p[1] -= mean[1];
-    msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
-  }
-  msqe /= n;
-  scale = (msqe == 0 ? 1.0 : sqrt(2) / msqe);
-  T[0] = scale;
-  T[1] = 0;
-  T[2] = -scale * mean[0];
-  T[3] = 0;
-  T[4] = scale;
-  T[5] = -scale * mean[1];
-  T[6] = 0;
-  T[7] = 0;
-  T[8] = 1;
-  for (p = pts, i = 0; i < n; ++i, p += 2) {
-    p[0] *= scale;
-    p[1] *= scale;
-  }
-}
-
-static void invnormalize_mat(double *T, double *iT) {
-  double is = 1.0 / T[0];
-  double m0 = -T[2] * is;
-  double m1 = -T[5] * is;
-  iT[0] = is;
-  iT[1] = 0;
-  iT[2] = m0;
-  iT[3] = 0;
-  iT[4] = is;
-  iT[5] = m1;
-  iT[6] = 0;
-  iT[7] = 0;
-  iT[8] = 1;
-}
-
-static void denormalize_homography(double *params, double *T1, double *T2) {
-  double iT2[9];
-  double params2[9];
-  invnormalize_mat(T2, iT2);
-  multiply_mat(params, T1, params2, 3, 3, 3);
-  multiply_mat(iT2, params2, params, 3, 3, 3);
-}
-
-static void denormalize_affine_reorder(double *params, double *T1, double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = params[0];
-  params_denorm[1] = params[1];
-  params_denorm[2] = params[4];
-  params_denorm[3] = params[2];
-  params_denorm[4] = params[3];
-  params_denorm[5] = params[5];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[2];
-  params[1] = params_denorm[5];
-  params[2] = params_denorm[0];
-  params[3] = params_denorm[1];
-  params[4] = params_denorm[3];
-  params[5] = params_denorm[4];
-  params[6] = params[7] = 0;
-}
-
-static void denormalize_rotzoom_reorder(double *params, double *T1,
-                                        double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = params[0];
-  params_denorm[1] = params[1];
-  params_denorm[2] = params[2];
-  params_denorm[3] = -params[1];
-  params_denorm[4] = params[0];
-  params_denorm[5] = params[3];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[2];
-  params[1] = params_denorm[5];
-  params[2] = params_denorm[0];
-  params[3] = params_denorm[1];
-  params[4] = -params[3];
-  params[5] = params[2];
-  params[6] = params[7] = 0;
-}
-
-static void denormalize_translation_reorder(double *params, double *T1,
-                                            double *T2) {
-  double params_denorm[MAX_PARAMDIM];
-  params_denorm[0] = 1;
-  params_denorm[1] = 0;
-  params_denorm[2] = params[0];
-  params_denorm[3] = 0;
-  params_denorm[4] = 1;
-  params_denorm[5] = params[1];
-  params_denorm[6] = params_denorm[7] = 0;
-  params_denorm[8] = 1;
-  denormalize_homography(params_denorm, T1, T2);
-  params[0] = params_denorm[2];
-  params[1] = params_denorm[5];
-  params[2] = params[5] = 1;
-  params[3] = params[4] = 0;
-  params[6] = params[7] = 0;
-}
-
-static int find_translation(int np, double *pts1, double *pts2, double *mat) {
-  int i;
-  double sx, sy, dx, dy;
-  double sumx, sumy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  sumx = 0;
-  sumy = 0;
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    sumx += dx - sx;
-    sumy += dy - sy;
-  }
-  mat[0] = sumx / np;
-  mat[1] = sumy / np;
-  denormalize_translation_reorder(mat, T1, T2);
-  return 0;
-}
-
-static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) {
-  const int np2 = np * 2;
-  double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 5 + 20));
-  double *b = a + np2 * 4;
-  double *temp = b + np2;
-  int i;
-  double sx, sy, dx, dy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 2 * 4 + 0] = sx;
-    a[i * 2 * 4 + 1] = sy;
-    a[i * 2 * 4 + 2] = 1;
-    a[i * 2 * 4 + 3] = 0;
-    a[(i * 2 + 1) * 4 + 0] = sy;
-    a[(i * 2 + 1) * 4 + 1] = -sx;
-    a[(i * 2 + 1) * 4 + 2] = 0;
-    a[(i * 2 + 1) * 4 + 3] = 1;
-
-    b[2 * i] = dx;
-    b[2 * i + 1] = dy;
-  }
-  if (!least_squares(4, a, np2, 4, b, temp, mat)) {
-    aom_free(a);
-    return 1;
-  }
-  denormalize_rotzoom_reorder(mat, T1, T2);
-  aom_free(a);
-  return 0;
-}
-
-static int find_affine(int np, double *pts1, double *pts2, double *mat) {
-  const int np2 = np * 2;
-  double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 7 + 42));
-  double *b = a + np2 * 6;
-  double *temp = b + np2;
-  int i;
-  double sx, sy, dx, dy;
-
-  double T1[9], T2[9];
-  normalize_homography(pts1, np, T1);
-  normalize_homography(pts2, np, T2);
-
-  for (i = 0; i < np; ++i) {
-    dx = *(pts2++);
-    dy = *(pts2++);
-    sx = *(pts1++);
-    sy = *(pts1++);
-
-    a[i * 2 * 6 + 0] = sx;
-    a[i * 2 * 6 + 1] = sy;
-    a[i * 2 * 6 + 2] = 0;
-    a[i * 2 * 6 + 3] = 0;
-    a[i * 2 * 6 + 4] = 1;
-    a[i * 2 * 6 + 5] = 0;
-    a[(i * 2 + 1) * 6 + 0] = 0;
-    a[(i * 2 + 1) * 6 + 1] = 0;
-    a[(i * 2 + 1) * 6 + 2] = sx;
-    a[(i * 2 + 1) * 6 + 3] = sy;
-    a[(i * 2 + 1) * 6 + 4] = 0;
-    a[(i * 2 + 1) * 6 + 5] = 1;
-
-    b[2 * i] = dx;
-    b[2 * i + 1] = dy;
-  }
-  if (!least_squares(6, a, np2, 6, b, temp, mat)) {
-    aom_free(a);
-    return 1;
-  }
-  denormalize_affine_reorder(mat, T1, T2);
-  aom_free(a);
-  return 0;
-}
-
-static int get_rand_indices(int npoints, int minpts, int *indices,
-                            unsigned int *seed) {
-  int i, j;
-  int ptr = lcg_rand16(seed) % npoints;
-  if (minpts > npoints) return 0;
-  indices[0] = ptr;
-  ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
-  i = 1;
-  while (i < minpts) {
-    int index = lcg_rand16(seed) % npoints;
-    while (index) {
-      ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
-      for (j = 0; j < i; ++j) {
-        if (indices[j] == ptr) break;
-      }
-      if (j == i) index--;
-    }
-    indices[i++] = ptr;
-  }
-  return 1;
-}
-
-typedef struct {
-  int num_inliers;
-  double variance;
-  int *inlier_indices;
-} RANSAC_MOTION;
-
-// Return -1 if 'a' is a better motion, 1 if 'b' is better, 0 otherwise.
-static int compare_motions(const void *arg_a, const void *arg_b) {
-  const RANSAC_MOTION *motion_a = (RANSAC_MOTION *)arg_a;
-  const RANSAC_MOTION *motion_b = (RANSAC_MOTION *)arg_b;
-
-  if (motion_a->num_inliers > motion_b->num_inliers) return -1;
-  if (motion_a->num_inliers < motion_b->num_inliers) return 1;
-  if (motion_a->variance < motion_b->variance) return -1;
-  if (motion_a->variance > motion_b->variance) return 1;
-  return 0;
-}
-
-static int is_better_motion(const RANSAC_MOTION *motion_a,
-                            const RANSAC_MOTION *motion_b) {
-  return compare_motions(motion_a, motion_b) < 0;
-}
-
-static void copy_points_at_indices(double *dest, const double *src,
-                                   const int *indices, int num_points) {
-  for (int i = 0; i < num_points; ++i) {
-    const int index = indices[i];
-    dest[i * 2] = src[index * 2];
-    dest[i * 2 + 1] = src[index * 2 + 1];
-  }
-}
-
-static const double kInfiniteVariance = 1e12;
-
-static void clear_motion(RANSAC_MOTION *motion, int num_points) {
-  motion->num_inliers = 0;
-  motion->variance = kInfiniteVariance;
-  memset(motion->inlier_indices, 0,
-         sizeof(*motion->inlier_indices * num_points));
-}
-
-static int ransac(const int *matched_points, int npoints,
-                  int *num_inliers_by_motion, double *params_by_motion,
-                  int num_desired_motions, const int minpts,
-                  IsDegenerateFunc is_degenerate,
-                  FindTransformationFunc find_transformation,
-                  ProjectPointsDoubleFunc projectpoints) {
-  static const double PROBABILITY_REQUIRED = 0.9;
-  static const double EPS = 1e-12;
-
-  int N = 10000, trial_count = 0;
-  int i = 0;
-  int ret_val = 0;
-
-  unsigned int seed = (unsigned int)npoints;
-
-  int indices[MAX_MINPTS] = { 0 };
-
-  double *points1, *points2;
-  double *corners1, *corners2;
-  double *image1_coord;
-
-  // Store information for the num_desired_motions best transformations found
-  // and the worst motion among them, as well as the motion currently under
-  // consideration.
-  RANSAC_MOTION *motions, *worst_kept_motion = NULL;
-  RANSAC_MOTION current_motion;
-
-  // Store the parameters and the indices of the inlier points for the motion
-  // currently under consideration.
-  double params_this_motion[MAX_PARAMDIM];
-
-  double *cnp1, *cnp2;
-
-  for (i = 0; i < num_desired_motions; ++i) {
-    num_inliers_by_motion[i] = 0;
-  }
-  if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
-    return 1;
-  }
-
-  points1 = (double *)aom_malloc(sizeof(*points1) * npoints * 2);
-  points2 = (double *)aom_malloc(sizeof(*points2) * npoints * 2);
-  corners1 = (double *)aom_malloc(sizeof(*corners1) * npoints * 2);
-  corners2 = (double *)aom_malloc(sizeof(*corners2) * npoints * 2);
-  image1_coord = (double *)aom_malloc(sizeof(*image1_coord) * npoints * 2);
-
-  motions =
-      (RANSAC_MOTION *)aom_malloc(sizeof(RANSAC_MOTION) * num_desired_motions);
-  for (i = 0; i < num_desired_motions; ++i) {
-    motions[i].inlier_indices =
-        (int *)aom_malloc(sizeof(*motions->inlier_indices) * npoints);
-    clear_motion(motions + i, npoints);
-  }
-  current_motion.inlier_indices =
-      (int *)aom_malloc(sizeof(*current_motion.inlier_indices) * npoints);
-  clear_motion(&current_motion, npoints);
-
-  worst_kept_motion = motions;
-
-  if (!(points1 && points2 && corners1 && corners2 && image1_coord && motions &&
-        current_motion.inlier_indices)) {
-    ret_val = 1;
-    goto finish_ransac;
-  }
-
-  cnp1 = corners1;
-  cnp2 = corners2;
-  for (i = 0; i < npoints; ++i) {
-    *(cnp1++) = *(matched_points++);
-    *(cnp1++) = *(matched_points++);
-    *(cnp2++) = *(matched_points++);
-    *(cnp2++) = *(matched_points++);
-  }
-
-  while (N > trial_count) {
-    double sum_distance = 0.0;
-    double sum_distance_squared = 0.0;
-
-    clear_motion(&current_motion, npoints);
-
-    int degenerate = 1;
-    int num_degenerate_iter = 0;
-
-    while (degenerate) {
-      num_degenerate_iter++;
-      if (!get_rand_indices(npoints, minpts, indices, &seed)) {
-        ret_val = 1;
-        goto finish_ransac;
-      }
-
-      copy_points_at_indices(points1, corners1, indices, minpts);
-      copy_points_at_indices(points2, corners2, indices, minpts);
-
-      degenerate = is_degenerate(points1);
-      if (num_degenerate_iter > MAX_DEGENERATE_ITER) {
-        ret_val = 1;
-        goto finish_ransac;
-      }
-    }
-
-    if (find_transformation(minpts, points1, points2, params_this_motion)) {
-      trial_count++;
-      continue;
-    }
-
-    projectpoints(params_this_motion, corners1, image1_coord, npoints, 2, 2);
-
-    for (i = 0; i < npoints; ++i) {
-      double dx = image1_coord[i * 2] - corners2[i * 2];
-      double dy = image1_coord[i * 2 + 1] - corners2[i * 2 + 1];
-      double distance = sqrt(dx * dx + dy * dy);
-
-      if (distance < INLIER_THRESHOLD) {
-        current_motion.inlier_indices[current_motion.num_inliers++] = i;
-        sum_distance += distance;
-        sum_distance_squared += distance * distance;
-      }
-    }
-
-    if (current_motion.num_inliers >= worst_kept_motion->num_inliers &&
-        current_motion.num_inliers > 1) {
-      int temp;
-      double fracinliers, pNoOutliers, mean_distance, dtemp;
-      mean_distance = sum_distance / ((double)current_motion.num_inliers);
-      current_motion.variance =
-          sum_distance_squared / ((double)current_motion.num_inliers - 1.0) -
-          mean_distance * mean_distance * ((double)current_motion.num_inliers) /
-              ((double)current_motion.num_inliers - 1.0);
-      if (is_better_motion(&current_motion, worst_kept_motion)) {
-        // This motion is better than the worst currently kept motion. Remember
-        // the inlier points and variance. The parameters for each kept motion
-        // will be recomputed later using only the inliers.
-        worst_kept_motion->num_inliers = current_motion.num_inliers;
-        worst_kept_motion->variance = current_motion.variance;
-        memcpy(worst_kept_motion->inlier_indices, current_motion.inlier_indices,
-               sizeof(*current_motion.inlier_indices) * npoints);
-
-        assert(npoints > 0);
-        fracinliers = (double)current_motion.num_inliers / (double)npoints;
-        pNoOutliers = 1 - pow(fracinliers, minpts);
-        pNoOutliers = fmax(EPS, pNoOutliers);
-        pNoOutliers = fmin(1 - EPS, pNoOutliers);
-        dtemp = log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers);
-        temp = (dtemp > (double)INT32_MAX)
-                   ? INT32_MAX
-                   : dtemp < (double)INT32_MIN ? INT32_MIN : (int)dtemp;
-
-        if (temp > 0 && temp < N) {
-          N = AOMMAX(temp, MIN_TRIALS);
-        }
-
-        // Determine the new worst kept motion and its num_inliers and variance.
-        for (i = 0; i < num_desired_motions; ++i) {
-          if (is_better_motion(worst_kept_motion, &motions[i])) {
-            worst_kept_motion = &motions[i];
-          }
-        }
-      }
-    }
-    trial_count++;
-  }
-
-  // Sort the motions, best first.
-  qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions);
-
-  // Recompute the motions using only the inliers.
-  for (i = 0; i < num_desired_motions; ++i) {
-    if (motions[i].num_inliers >= minpts) {
-      copy_points_at_indices(points1, corners1, motions[i].inlier_indices,
-                             motions[i].num_inliers);
-      copy_points_at_indices(points2, corners2, motions[i].inlier_indices,
-                             motions[i].num_inliers);
-
-      find_transformation(motions[i].num_inliers, points1, points2,
-                          params_by_motion + (MAX_PARAMDIM - 1) * i);
-    }
-    num_inliers_by_motion[i] = motions[i].num_inliers;
-  }
-
-finish_ransac:
-  aom_free(points1);
-  aom_free(points2);
-  aom_free(corners1);
-  aom_free(corners2);
-  aom_free(image1_coord);
-  aom_free(current_motion.inlier_indices);
-  for (i = 0; i < num_desired_motions; ++i) {
-    aom_free(motions[i].inlier_indices);
-  }
-  aom_free(motions);
-
-  return ret_val;
-}
-
-static int is_collinear3(double *p1, double *p2, double *p3) {
-  static const double collinear_eps = 1e-3;
-  const double v =
-      (p2[0] - p1[0]) * (p3[1] - p1[1]) - (p2[1] - p1[1]) * (p3[0] - p1[0]);
-  return fabs(v) < collinear_eps;
-}
-
-static int is_degenerate_translation(double *p) {
-  return (p[0] - p[2]) * (p[0] - p[2]) + (p[1] - p[3]) * (p[1] - p[3]) <= 2;
-}
-
-static int is_degenerate_affine(double *p) {
-  return is_collinear3(p, p + 2, p + 4);
-}
-
-int ransac_translation(int *matched_points, int npoints,
-                       int *num_inliers_by_motion, double *params_by_motion,
-                       int num_desired_motions) {
-  return ransac(matched_points, npoints, num_inliers_by_motion,
-                params_by_motion, num_desired_motions, 3,
-                is_degenerate_translation, find_translation,
-                project_points_double_translation);
-}
-
-int ransac_rotzoom(int *matched_points, int npoints, int *num_inliers_by_motion,
-                   double *params_by_motion, int num_desired_motions) {
-  return ransac(matched_points, npoints, num_inliers_by_motion,
-                params_by_motion, num_desired_motions, 3, is_degenerate_affine,
-                find_rotzoom, project_points_double_rotzoom);
-}
-
-int ransac_affine(int *matched_points, int npoints, int *num_inliers_by_motion,
-                  double *params_by_motion, int num_desired_motions) {
-  return ransac(matched_points, npoints, num_inliers_by_motion,
-                params_by_motion, num_desired_motions, 3, is_degenerate_affine,
-                find_affine, project_points_double_affine);
-}
diff --git a/third_party/aom/av1/encoder/ransac.h b/third_party/aom/av1/encoder/ransac.h
deleted file mode 100644
index c429f2ce5..000000000
--- a/third_party/aom/av1/encoder/ransac.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RANSAC_H_
-#define AOM_AV1_ENCODER_RANSAC_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <memory.h>
-
-#include "av1/common/warped_motion.h"
-
-typedef int (*RansacFunc)(int *matched_points, int npoints,
-                          int *num_inliers_by_motion, double *params_by_motion,
-                          int num_motions);
-
-/* Each of these functions fits a motion model from a set of
-   corresponding points in 2 frames using RANSAC. */
-int ransac_affine(int *matched_points, int npoints, int *num_inliers_by_motion,
-                  double *params_by_motion, int num_motions);
-int ransac_rotzoom(int *matched_points, int npoints, int *num_inliers_by_motion,
-                   double *params_by_motion, int num_motions);
-int ransac_translation(int *matched_points, int npoints,
-                       int *num_inliers_by_motion, double *params_by_motion,
-                       int num_motions);
-#endif  // AOM_AV1_ENCODER_RANSAC_H_
diff --git a/third_party/aom/av1/encoder/rate_distortion_model_params.h b/third_party/aom/av1/encoder/rate_distortion_model_params.h
deleted file mode 100644
index 7cd0962c5..000000000
--- a/third_party/aom/av1/encoder/rate_distortion_model_params.h
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_
-#define AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/ml.h"
-
-// 22 float features +
-// 2 categorical features with 4 possible values, converted to one-hot vectors.
-// So, total 22 + 2 * 4 = 30 features.
-#define NUM_FEATURES 30
-#define NUM_HIDDEN_LAYERS 1
-#define NUM_HIDDEN_NODES 96
-#define NUM_OUTPUTS 1
-
-//------------------------------------------------------------------------------
-// RDCost model
-
-static const float
-    av1_rdcost_model_nn_weights_layer0[NUM_FEATURES * NUM_HIDDEN_NODES] = {
-      -0.0699f,   0.2790f,    0.1915f,    0.2669f,    0.4637f,    0.4095f,
-      0.2129f,    0.0634f,    0.2306f,    -0.2232f,   -0.5711f,   -0.6493f,
-      -0.7406f,   -0.8440f,   0.4105f,    0.1392f,    0.5218f,    -0.1618f,
-      -0.1719f,   0.3409f,    0.1111f,    -0.3609f,   -0.2929f,   0.3869f,
-      -0.5373f,   0.0700f,    0.2572f,    0.2483f,    -0.0314f,   0.5228f,
-      0.0169f,    -0.1357f,   0.0419f,    -0.1722f,   0.1303f,    0.1198f,
-      -0.0013f,   0.1309f,    0.0293f,    -0.1941f,   0.0668f,    -0.0643f,
-      -0.0381f,   0.1249f,    -0.0731f,   -0.1649f,   0.0964f,    0.0270f,
-      0.1354f,    0.0538f,    -0.2064f,   -0.2067f,   -0.0569f,   0.0449f,
-      0.1680f,    -0.0732f,   -0.0785f,   0.1884f,    -0.2137f,   -0.0189f,
-      0.2976f,    0.2818f,    -0.0222f,   0.2658f,    0.0488f,    0.2778f,
-      -0.1110f,   0.2069f,    -0.0072f,   -0.0095f,   -0.1105f,   -0.1365f,
-      -0.4245f,   -0.4751f,   -0.0736f,   0.2333f,    0.0653f,    -0.0249f,
-      0.0055f,    -0.0838f,   -0.0489f,   -0.2597f,   0.2621f,    -0.0251f,
-      -0.0545f,   0.0816f,    -0.0816f,   0.3396f,    -0.1047f,   0.3678f,
-      0.1487f,    -0.0270f,   0.2574f,    0.1018f,    0.2560f,    -0.0598f,
-      -0.0446f,   -0.1792f,   0.5336f,    -0.1590f,   -0.9820f,   -0.6514f,
-      -0.6304f,   -0.8359f,   -0.0699f,   0.0295f,    -0.0057f,   -0.3088f,
-      -0.1466f,   0.2220f,    -0.1980f,   -0.3400f,   -0.1228f,   0.2667f,
-      -0.4816f,   0.0155f,    -0.0194f,   0.2051f,    0.0513f,    0.1575f,
-      -121.4240f, -126.6840f, -124.1106f, -127.6184f, -85.0333f,  -26.6396f,
-      2.7020f,    102.0452f,  -85.5128f,  0.0076f,    122.2206f,  107.5265f,
-      108.3773f,  93.4847f,   20.3705f,   -89.6993f,  -176.9070f, -41.7543f,
-      -123.0293f, -91.6437f,  -205.7099f, -62.5346f,  -83.2987f,  21.3830f,
-      56.6341f,   -120.8647f, -127.7562f, -121.6688f, -127.4225f, -74.8045f,
-      -15.9247f,  -14.6468f,  -14.7788f,  -15.4498f,  -18.5514f,  -11.1579f,
-      -5.8164f,   -3.4318f,   0.8100f,    0.0642f,    203.5111f,  189.6872f,
-      190.4776f,  176.4784f,  -4.9427f,   -12.5324f,  -7.6861f,   21.9182f,
-      -6.7864f,   -7.1906f,   -8.1292f,   21.4780f,   -7.8016f,   -5.2653f,
-      61.8526f,   -15.5105f,  -14.6900f,  -14.1459f,  -15.4350f,  -19.1379f,
-      -0.7876f,   -1.8558f,   -4.6035f,   -6.8405f,   -0.2904f,   2.3202f,
-      1.8127f,    -2.9397f,   -0.8187f,   -0.6098f,   22.6173f,   10.3668f,
-      12.9363f,   2.4541f,    6.6700f,    0.3804f,    -3.3117f,   8.5464f,
-      -25.8348f,  1.8698f,    -9.5753f,   8.5558f,    -16.3576f,  7.2217f,
-      35.3115f,   -1.1447f,   -2.6530f,   -4.7027f,   -5.7024f,   -0.9513f,
-      0.8393f,    0.7085f,    0.7879f,    0.3728f,    3.0574f,    1.1360f,
-      26.0531f,   4.1084f,    -1.7340f,   0.1683f,    -450.7927f, -444.5818f,
-      -442.5239f, -438.1168f, 2.4924f,    -0.0147f,   -0.0797f,   -47.5322f,
-      -1.7638f,   -0.8608f,   -0.6500f,   -44.4326f,  -0.9027f,   2.5560f,
-      -267.6517f, 0.2642f,    0.9457f,    0.7944f,    0.3609f,    3.2742f,
-      -74.3400f,  -81.6894f,  -76.2162f,  -69.2979f,  -90.2476f,  -39.7389f,
-      2.2545f,    36.5095f,   -60.1129f,  -1.0383f,   87.0348f,   83.9940f,
-      83.7199f,   80.8609f,   14.9075f,   -78.7405f,  -74.3549f,  -4.2382f,
-      -23.9739f,  -91.8469f,  -67.2654f,  -21.5293f,  -9.9857f,   11.8391f,
-      35.8223f,   -74.2551f,  -81.0729f,  -73.8347f,  -70.3798f,  -86.8052f,
-      0.1701f,    -0.1136f,   0.0060f,    -0.0496f,   -0.1727f,   0.0195f,
-      -0.1040f,   0.1027f,    0.0467f,    -0.2538f,   -0.1322f,   0.0860f,
-      0.0093f,    -0.2801f,   -0.0958f,   0.0497f,    -0.0582f,   -0.0311f,
-      0.1840f,    0.0752f,    0.0282f,    0.0297f,    0.0607f,    0.0650f,
-      0.0893f,    0.1297f,    0.0373f,    0.0040f,    -0.0973f,   0.0248f,
-      -0.1419f,   0.0322f,    -0.0712f,   0.0860f,    -0.0426f,   -0.1989f,
-      0.1393f,    -0.1183f,   0.0735f,    -0.1895f,   0.1447f,    -0.0056f,
-      -0.1833f,   0.0884f,    0.0949f,    0.0476f,    0.0551f,    0.2125f,
-      -0.1537f,   -0.0141f,   -0.2182f,   0.1567f,    0.0457f,    -0.1485f,
-      -0.1177f,   0.0391f,    0.1982f,    -0.1288f,   0.1165f,    -0.2019f,
-      0.4550f,    0.5179f,    0.4311f,    0.1861f,    0.6199f,    0.4542f,
-      0.2034f,    0.1128f,    1.3489f,    -0.2525f,   -2.1139f,   -2.2444f,
-      -2.3679f,   -2.3378f,   0.5682f,    0.1348f,    0.3032f,    -1.5835f,
-      0.2883f,    0.1693f,    0.0439f,    -1.4556f,   0.3818f,    0.4875f,
-      -1.8899f,   0.2510f,    0.6450f,    0.6082f,    0.5962f,    0.8131f,
-      12.0281f,   13.3899f,   13.6249f,   15.8068f,   -1.5453f,   6.7456f,
-      -6.0877f,   26.2596f,   6.2223f,    -0.5922f,   134.1428f,  128.8985f,
-      128.7538f,  123.0920f,  1.3207f,    18.3069f,   15.7436f,   46.5230f,
-      24.7455f,   15.0688f,   19.9965f,   34.7236f,   19.7171f,   1.2018f,
-      49.7274f,   11.8957f,   13.1578f,   14.0451f,   15.3544f,   -3.5601f,
-      1.0048f,    0.9479f,    1.1832f,    2.0635f,    -2.9808f,   2.0803f,
-      -7.5815f,   8.4733f,    -4.2008f,   0.1217f,    226.5257f,  210.7018f,
-      211.6235f,  195.2605f,  0.8283f,    1.0977f,    1.4858f,    41.1242f,
-      1.5822f,    0.8742f,    2.0440f,    33.6213f,   1.6177f,    0.9661f,
-      65.0014f,   1.4197f,    1.0109f,    1.3153f,    1.5470f,    -3.2833f,
-      2.0858f,    2.0012f,    2.1088f,    2.5593f,    -0.9422f,   1.8554f,
-      -6.5378f,   0.6780f,    2.3186f,    0.0506f,    218.3285f,  203.4055f,
-      204.0362f,  188.7854f,  0.3701f,    2.5257f,    3.5172f,    28.8144f,
-      2.1511f,    3.4676f,    2.6337f,    28.5113f,   2.4254f,    -0.0548f,
-      59.4511f,   2.0757f,    2.1551f,    2.2271f,    2.5300f,    -1.4173f,
-      91.9240f,   88.2142f,   83.6155f,   82.2482f,   -9.2566f,   10.9654f,
-      -2.6974f,   62.6750f,   -3.6298f,   -0.1245f,   69.6721f,   67.1340f,
-      66.9162f,   64.1994f,   -83.6778f,  76.8107f,   69.7832f,   64.9261f,
-      68.4901f,   76.3615f,   70.8108f,   63.5435f,   69.1973f,   -83.6034f,
-      24.8275f,   90.1923f,   87.6831f,   82.9783f,   81.8558f,   -7.1010f,
-      95.1656f,   88.3853f,   80.5835f,   79.5990f,   -3.0720f,   8.1290f,
-      -0.6151f,   63.6425f,   -4.5833f,   -0.0063f,   70.1861f,   66.6250f,
-      66.6148f,   63.0886f,   -89.2863f,  74.7684f,   64.8897f,   60.4134f,
-      62.5241f,   78.7076f,   61.7234f,   60.1688f,   61.9509f,   -89.4098f,
-      30.3361f,   92.9144f,   88.5954f,   79.6336f,   79.2453f,   -0.4101f,
-      0.6287f,    0.8050f,    0.4417f,    0.5419f,    0.5972f,    1.3037f,
-      0.4316f,    -0.0013f,   -0.3673f,   -0.4952f,   6.1773f,    5.7825f,
-      6.1705f,    5.3848f,    1.7607f,    -0.0152f,   -0.2924f,   0.8199f,
-      1.3326f,    0.7197f,    -0.6332f,   1.1127f,    1.0472f,    1.8468f,
-      3.4419f,    0.8233f,    0.7175f,    0.8514f,    0.6372f,    0.9472f,
-      -0.0813f,   -0.0197f,   -0.0096f,   -0.2015f,   0.1133f,    -0.0305f,
-      0.0578f,    0.1375f,    -0.0750f,   -0.1702f,   0.1246f,    -0.1782f,
-      0.2017f,    0.0425f,    -0.0602f,   0.1837f,    0.1044f,    -0.1273f,
-      -0.1431f,   0.0672f,    -0.1807f,   -0.1045f,   -0.1355f,   -0.0497f,
-      -0.0561f,   -0.0633f,   0.1907f,    -0.0777f,   0.1203f,    0.0754f,
-      0.4079f,    0.2001f,    0.0558f,    0.0622f,    0.2951f,    0.6541f,
-      -0.0068f,   0.1070f,    0.4469f,    -0.1266f,   -1.3035f,   -1.3324f,
-      -1.3612f,   -0.9966f,   0.7986f,    0.3192f,    -0.5028f,   -0.3844f,
-      -0.4079f,   0.6690f,    -0.5109f,   -0.2719f,   -0.4958f,   1.0310f,
-      -0.8044f,   0.1447f,    0.4221f,    0.3194f,    0.3063f,    0.5520f,
-      0.4667f,    -5.7238f,   -0.5602f,   12.6339f,   -15.1865f,  -14.9035f,
-      -3.0726f,   9.5347f,    -24.6225f,  -2.7086f,   89.8557f,   95.0657f,
-      93.8693f,   99.1085f,   -35.9483f,  -18.0363f,  -1.6298f,   25.3484f,
-      39.3975f,   -15.3199f,  5.7664f,    17.2367f,   25.2788f,   -36.5648f,
-      29.1426f,   0.3857f,    -5.2117f,   0.0533f,    12.1707f,   -11.1735f,
-      0.2673f,    0.0090f,    0.1574f,    0.0904f,    0.0281f,    0.1144f,
-      0.1123f,    -0.0061f,   0.0954f,    -0.0094f,   -0.4387f,   -0.5006f,
-      -0.2560f,   -0.2326f,   -0.1769f,   0.0465f,    0.1273f,    -0.1627f,
-      0.2987f,    -0.3041f,   0.1131f,    -0.3620f,   0.0932f,    -0.0649f,
-      -0.4597f,   0.2535f,    -0.0994f,   0.1390f,    0.1279f,    0.4207f,
-      -39.1159f,  -42.6382f,  -38.4225f,  -31.2301f,  -28.2382f,  -28.1176f,
-      -9.5822f,   1.1886f,    -1.2964f,   -0.7908f,   154.9819f,  147.1914f,
-      147.0482f,  138.7535f,  -21.7014f,  -35.7117f,  -28.8802f,  -3.8968f,
-      -21.5007f,  -28.2213f,  -28.4878f,  -3.7558f,   -26.8317f,  -22.8491f,
-      50.9464f,   -37.0918f,  -42.8811f,  -39.3079f,  -32.1904f,  -26.6354f,
-      -72.5346f,  -75.5751f,  -72.6896f,  -71.3671f,  -35.3279f,  -21.6077f,
-      -5.8259f,   38.7516f,   -6.8012f,   0.0172f,    170.0685f,  157.4452f,
-      158.2334f,  145.0102f,  10.0653f,   -45.1775f,  -56.4571f,  -5.1165f,
-      -75.8980f,  -46.8672f,  -55.3642f,  -6.5631f,   -81.0258f,  10.1348f,
-      55.9786f,   -70.8124f,  -75.7040f,  -73.9831f,  -70.8786f,  -34.9723f,
-      88.6239f,   86.5330f,   80.9333f,   79.6833f,   -10.0096f,  10.6312f,
-      -4.2350f,   62.6230f,   -3.2991f,   -0.0843f,   75.8659f,   72.7886f,
-      72.5301f,   68.8265f,   -81.8276f,  70.3025f,   62.9511f,   62.5706f,
-      69.1842f,   69.3637f,   65.4820f,   65.4357f,   71.5347f,   -82.1064f,
-      24.1925f,   86.2418f,   85.4985f,   80.4091f,   79.5378f,   -9.3877f,
-      -7.6594f,   -4.9581f,   -10.6385f,  -20.2307f,  -44.2261f,  -13.7557f,
-      -4.5344f,   18.1793f,   -10.5522f,  -1.5878f,   110.3187f,  102.4945f,
-      102.3305f,  94.1324f,   -25.2665f,  9.8172f,    -4.4791f,   69.4972f,
-      -6.7571f,   5.8378f,    -11.6101f,  70.7066f,   -4.9327f,   -24.0513f,
-      41.4598f,   -7.0600f,   -7.0940f,   -10.2478f,  -18.9616f,  -46.7505f,
-      90.9365f,   86.0260f,   73.2934f,   69.3406f,   3.3863f,    3.8524f,
-      0.6536f,    63.2150f,   -10.6304f,  0.0291f,    73.0071f,   69.7660f,
-      69.0457f,   65.5611f,   -92.3379f,  74.2756f,   54.5025f,   84.3183f,
-      53.7481f,   73.5624f,   55.3827f,   82.3242f,   53.5432f,   -92.5355f,
-      25.3457f,   89.1858f,   84.4763f,   72.9840f,   69.1889f,   4.6719f,
-      -0.0129f,   0.1995f,    0.2069f,    0.0358f,    0.1209f,    -0.1185f,
-      -0.1217f,   -0.1456f,   0.0125f,    -0.1354f,   0.0510f,    -0.0572f,
-      0.1397f,    0.1453f,    -0.0086f,   0.0107f,    0.0232f,    0.1508f,
-      0.0884f,    -0.0967f,   -0.1786f,   0.1361f,    -0.1399f,   -0.2021f,
-      -0.0242f,   -0.2169f,   0.0133f,    0.0116f,    -0.1489f,   -0.0093f,
-      -0.0796f,   0.1507f,    0.0906f,    0.0228f,    -0.0166f,   -0.1875f,
-      0.0471f,    0.1184f,    -0.0007f,   -0.2732f,   -0.1386f,   -0.2057f,
-      -0.0213f,   -0.1699f,   0.0996f,    0.1562f,    0.1850f,    -0.0362f,
-      -0.2059f,   0.0258f,    -0.0135f,   -0.1276f,   0.0034f,    0.2023f,
-      0.0857f,    -0.0085f,   -0.1955f,   -0.1666f,   -0.0920f,   0.0971f,
-      -0.0292f,   -0.0512f,   -0.0753f,   -0.0739f,   -0.0873f,   -0.1200f,
-      0.0220f,    -0.1359f,   0.2013f,    -0.0445f,   0.1143f,    -0.1484f,
-      -0.1556f,   -0.0003f,   0.1711f,    -0.0724f,   -0.0531f,   0.1126f,
-      0.0476f,    -0.0057f,   0.0088f,    0.0792f,    -0.0438f,   -0.1118f,
-      -0.0244f,   0.0712f,    0.0930f,    -0.0203f,   0.1662f,    -0.0695f,
-      -12.3872f,  -18.7022f,  -13.4237f,  -1.4731f,   -18.6843f,  -14.1515f,
-      -7.5057f,   40.2090f,   -2.7774f,   -1.8433f,   123.6006f,  119.0557f,
-      118.2758f,  113.6423f,  -32.6216f,  -19.5865f,  -16.2897f,  17.2068f,
-      6.3559f,    -17.8742f,  0.7098f,    11.5970f,   -10.1104f,  -33.1830f,
-      39.5617f,   -10.5499f,  -17.8137f,  -14.7185f,  -2.6172f,   -14.6004f,
-      0.3893f,    0.4443f,    0.5305f,    0.3049f,    0.8316f,    0.8679f,
-      0.2265f,    0.2393f,    1.1970f,    -0.2891f,   -1.8666f,   -1.8266f,
-      -1.6984f,   -1.8787f,   0.8706f,    0.4208f,    0.5076f,    -0.8436f,
-      -0.1623f,   0.8008f,    0.1512f,    -1.0839f,   -0.3002f,   0.9263f,
-      -1.3031f,   0.5964f,    0.3413f,    0.5551f,    0.2618f,    0.7018f,
-      -0.1320f,   -0.1944f,   -0.0209f,   -0.0877f,   0.0721f,    -0.0840f,
-      0.0589f,    0.1019f,    0.1927f,    -0.2011f,   -0.1117f,   0.1575f,
-      0.1080f,    -0.0516f,   0.2154f,    -0.1231f,   0.0426f,    -0.0522f,
-      -0.1824f,   -0.1923f,   -0.1206f,   -0.1724f,   -0.0798f,   0.0401f,
-      -0.2170f,   0.0293f,    -0.0853f,   0.1517f,    0.2128f,    -0.1934f,
-      0.0406f,    0.0517f,    0.0822f,    -0.0150f,   0.0943f,    -0.0989f,
-      -0.1802f,   -0.1453f,   -0.1967f,   -0.1797f,   0.1545f,    -0.1217f,
-      0.1755f,    -0.1604f,   -0.0515f,   0.0509f,    0.0310f,    -0.1220f,
-      -0.1770f,   -0.0157f,   0.1989f,    -0.0069f,   0.1766f,    0.1267f,
-      -0.0517f,   -0.0396f,   0.0346f,    0.1946f,    0.1162f,    -0.1345f,
-      -106.6179f, -110.5917f, -107.5476f, -108.0601f, -61.1687f,  -22.4247f,
-      2.6632f,    109.5208f,  -66.1177f,  0.0062f,    159.9339f,  144.7755f,
-      145.5032f,  128.9872f,  18.9180f,   -75.3569f,  -105.0866f, -52.0704f,
-      -119.1299f, -74.7543f,  -109.9468f, -59.0682f,  -104.5754f, 19.2878f,
-      67.2573f,   -104.8061f, -111.8610f, -106.6751f, -107.3537f, -56.4758f,
-      -0.6967f,   -0.8495f,   -0.9586f,   -1.0461f,   1.4522f,    -0.2762f,
-      28.2828f,   2.9157f,    -2.1062f,   0.1566f,    -467.2388f, -461.0685f,
-      -459.0092f, -453.8370f, 1.5422f,    -0.8186f,   -0.4884f,   -53.0399f,
-      -2.0255f,   -1.1348f,   -1.1039f,   -50.2489f,  -1.4821f,   1.8021f,
-      -258.0319f, -1.0865f,   -0.5542f,   -1.0443f,   -1.2732f,   1.8413f,
-      0.2377f,    0.1937f,    -0.0116f,   0.0935f,    -0.0599f,   0.0118f,
-      -0.0875f,   0.0455f,    -0.1301f,   -0.1081f,   -0.2622f,   -0.1960f,
-      0.0393f,    -0.1490f,   0.1852f,    -0.0964f,   -0.0741f,   0.0419f,
-      0.1162f,    -0.0274f,   0.1200f,    -0.0333f,   -0.1337f,   0.2141f,
-      0.0664f,    0.1044f,    -0.1744f,   0.1060f,    -0.1468f,   0.0679f,
-      0.0218f,    0.0494f,    0.1064f,    0.1363f,    0.0013f,    0.1331f,
-      -0.2095f,   0.2088f,    -0.0399f,   -0.1811f,   0.0678f,    -0.1974f,
-      0.1855f,    -0.0968f,   -0.2008f,   0.0162f,    -0.0096f,   -0.1493f,
-      0.2170f,    -0.1248f,   -0.2055f,   0.1276f,    -0.0269f,   -0.1697f,
-      -0.0662f,   0.1073f,    -0.0029f,   -0.1051f,   -0.1573f,   0.2106f,
-      -0.2020f,   -0.1565f,   0.0335f,    -0.1818f,   -0.1665f,   0.2169f,
-      0.1974f,    -0.1470f,   -0.1738f,   -0.2038f,   0.0558f,    -0.0441f,
-      0.0065f,    -0.1485f,   -0.1366f,   -0.2131f,   0.1042f,    0.0349f,
-      -0.1804f,   -0.1361f,   -0.0116f,   -0.1012f,   -0.0860f,   0.0606f,
-      -0.2077f,   0.1826f,    -0.1014f,   -0.0721f,   -0.1517f,   0.1022f,
-      -0.1110f,   -0.0186f,   0.1505f,    0.1797f,    0.0911f,    0.0340f,
-      0.1702f,    -0.1404f,   -0.0566f,   -0.2744f,   -0.1943f,   -0.1871f,
-      0.0046f,    0.0306f,    -0.0436f,   0.1625f,    -0.1302f,   0.0175f,
-      0.1570f,    -0.1425f,   0.0779f,    0.1398f,    0.0929f,    0.0897f,
-      0.0458f,    -0.0936f,   0.1321f,    -0.1355f,   0.0974f,    0.0457f,
-      -73.3516f,  -75.0655f,  -72.1062f,  -72.4624f,  -34.8640f,  -14.3727f,
-      -4.4720f,   66.4982f,   -18.8358f,  0.0397f,    174.2172f,  160.4959f,
-      161.1034f,  147.3250f,  9.5507f,    -45.0180f,  -73.1609f,  -1.5230f,
-      -74.8677f,  -43.8559f,  -68.7622f,  -4.8971f,   -82.1922f,  9.6490f,
-      64.7115f,   -71.8566f,  -75.3879f,  -72.5479f,  -71.7161f,  -34.8056f,
-      0.1442f,    0.1558f,    0.1267f,    -0.1261f,   -0.0506f,   -0.0823f,
-      -0.1807f,   -0.0889f,   -0.2098f,   -0.1295f,   -0.2046f,   -0.1749f,
-      -0.1197f,   -0.1380f,   0.0799f,    -0.0889f,   -0.1209f,   0.1919f,
-      0.1947f,    -0.2086f,   -0.1042f,   -0.0468f,   0.0232f,    0.1052f,
-      -0.0535f,   0.1398f,    0.1713f,    -0.1522f,   0.1453f,    0.0286f,
-      -64.8503f,  -67.6746f,  -63.6497f,  -60.4614f,  -35.6091f,  -20.1605f,
-      -3.6082f,   84.2801f,   -37.8552f,  -2.2371f,   132.4947f,  123.5057f,
-      123.5776f,  113.9060f,  -14.8772f,  -40.7130f,  -79.1391f,  -10.7024f,
-      -65.7831f,  -43.6078f,  -79.6847f,  -13.0743f,  -69.2533f,  -16.0171f,
-      50.4868f,   -64.3678f,  -68.7061f,  -64.0823f,  -59.3413f,  -28.9405f,
-      77.1601f,   75.4899f,   69.8696f,   67.8764f,   -22.7548f,  5.9814f,
-      -3.2826f,   57.9754f,   -5.9500f,   -0.0014f,   77.2251f,   74.0737f,
-      73.7004f,   70.5072f,   -80.9661f,  69.3065f,   55.8337f,   76.8831f,
-      57.9902f,   63.4765f,   56.4748f,   70.0282f,   61.0874f,   -81.3960f,
-      26.2594f,   76.0367f,   74.9115f,   69.2361f,   66.9262f,   -20.1637f,
-      0.1886f,    -0.1108f,   0.1262f,    0.0189f,    0.1382f,    0.0859f,
-      -0.1874f,   -0.1986f,   -0.0171f,   -0.1400f,   -0.2944f,   -0.0750f,
-      -0.0395f,   -0.2092f,   -0.0878f,   0.1216f,    -0.0870f,   -0.1613f,
-      0.2495f,    0.0754f,    0.0244f,    -0.1205f,   -0.0196f,   -0.1729f,
-      0.1170f,    0.1585f,    0.1482f,    -0.1705f,   -0.1337f,   0.0199f,
-      13.0897f,   9.1111f,    6.7413f,    6.3907f,    -28.1187f,  0.4556f,
-      -5.3116f,   30.7293f,   -16.3644f,  -0.0365f,   118.9118f,  111.6125f,
-      111.3227f,  103.4680f,  -30.1883f,  8.9328f,    -4.1876f,   79.3936f,
-      -9.0522f,   12.7861f,   -1.2736f,   78.0446f,   -5.9485f,   -30.5716f,
-      27.8951f,   13.9613f,   6.7173f,    5.2345f,    8.3271f,    -27.3705f,
-      1.0488f,    1.0864f,    1.0710f,    1.7332f,    -3.0561f,   1.1622f,
-      -7.6688f,   3.0491f,    -1.3865f,   0.0769f,    222.5451f,  207.8170f,
-      208.1767f,  193.1396f,  0.4447f,    2.1654f,    1.8929f,    35.1469f,
-      1.1783f,    2.6199f,    1.1611f,    26.2989f,   3.4446f,    0.1551f,
-      65.6529f,   1.2229f,    0.9851f,    1.0241f,    1.4373f,    -3.3421f,
-      0.1388f,    0.0756f,    0.2047f,    0.1140f,    0.0945f,    0.2038f,
-      0.1038f,    -0.2068f,   -0.0626f,   -0.1937f,   0.1347f,    -0.0464f,
-      -0.0866f,   0.0250f,    0.0264f,    -0.1556f,   -0.1625f,   0.1028f,
-      -0.1255f,   -0.0854f,   0.1033f,    0.0008f,    -0.2133f,   -0.0317f,
-      0.1725f,    -0.1054f,   -0.1900f,   0.0383f,    0.0440f,    -0.1900f,
-      -30.0811f,  -30.9929f,  -29.3194f,  -26.8347f,  -20.5957f,  -4.1595f,
-      -1.9066f,   42.4707f,   -9.0435f,   0.0064f,    175.7328f,  163.1350f,
-      163.5085f,  151.1648f,  4.4620f,    -20.6011f,  -19.3402f,  1.5468f,
-      -32.0920f,  -25.4581f,  -12.3706f,  -2.1636f,   -32.4569f,  3.9365f,
-      61.0117f,   -28.4195f,  -31.0837f,  -30.2749f,  -27.5522f,  -22.8688f,
-      -0.3000f,   0.0092f,    -0.3675f,   -0.4113f,   0.0033f,    0.1138f,
-      0.2182f,    -0.5803f,   0.7507f,    -0.2529f,   -1.7724f,   -1.4702f,
-      -1.5805f,   -1.4294f,   0.1435f,    -0.0168f,   0.2356f,    -0.4373f,
-      -0.4500f,   -0.4803f,   -0.0041f,   -0.3878f,   0.1321f,    0.2761f,
-      -1.1975f,   -0.3509f,   -0.0465f,   -0.4050f,   -0.1110f,   0.2233f,
-      0.0950f,    0.0974f,    -0.1600f,   -0.1753f,   -0.0328f,   0.0741f,
-      -0.0706f,   0.1839f,    -0.0833f,   -0.1367f,   -0.1094f,   -0.1739f,
-      -0.1069f,   0.0370f,    -0.1404f,   0.1631f,    -0.1570f,   0.2117f,
-      -0.1891f,   0.0395f,    0.1081f,    0.1760f,    0.0997f,    0.0853f,
-      -0.1018f,   0.1306f,    -0.0924f,   -0.2078f,   0.0801f,    -0.0949f,
-      0.5803f,    0.5578f,    0.4089f,    0.1912f,    0.6774f,    0.3145f,
-      0.3992f,    -0.1316f,   1.3142f,    -0.2457f,   -2.3536f,   -2.4939f,
-      -2.3165f,   -2.4879f,   0.2321f,    0.1901f,    0.1789f,    -1.5215f,
-      0.2645f,    0.2231f,    0.2411f,    -1.2361f,   0.2971f,    0.1421f,
-      -1.6715f,   0.3158f,    0.2476f,    0.3596f,    0.3029f,    0.9297f,
-      -88.8401f,  -89.5209f,  -86.1926f,  -87.4196f,  -39.6504f,  -17.9684f,
-      -4.2702f,   80.2017f,   -29.1676f,  -0.4190f,   150.2820f,  138.4751f,
-      139.1087f,  126.6569f,  13.7188f,   -57.0739f,  -80.3383f,  -18.8351f,
-      -87.4103f,  -56.0072f,  -82.7707f,  -23.1871f,  -93.6787f,  13.9287f,
-      59.6213f,   -87.4843f,  -90.4227f,  -86.2635f,  -86.6841f,  -37.9086f,
-      0.1184f,    -0.2169f,   -0.1915f,   0.0543f,    0.1253f,    -0.1370f,
-      0.0836f,    -0.1198f,   0.1544f,    -0.2004f,   -0.1118f,   -0.0786f,
-      0.1517f,    -0.1000f,   -0.1055f,   0.0936f,    -0.1579f,   0.1098f,
-      -0.0234f,   -0.0499f,   0.0951f,    -0.1711f,   0.0186f,    -0.2008f,
-      0.1777f,    0.1386f,    -0.1495f,   -0.0684f,   -0.2149f,   -0.1198f,
-      -0.6205f,   -0.7209f,   -0.5487f,   -0.9080f,   1.3400f,    0.0085f,
-      28.2837f,   3.2217f,    -1.8463f,   0.1620f,    -464.3599f, -458.4327f,
-      -455.9967f, -451.0393f, 1.6619f,    -0.6944f,   -0.3167f,   -52.3630f,
-      -1.6971f,   -0.7340f,   -0.8923f,   -49.2771f,  -1.1177f,   1.8810f,
-      -258.9386f, -1.0765f,   -0.7279f,   -0.5208f,   -0.8839f,   1.8175f,
-      -78.8510f,  -80.5740f,  -77.8843f,  -77.9798f,  -36.5560f,  -16.0818f,
-      -5.5362f,   66.4228f,   -16.8150f,  0.0036f,    181.8365f,  167.7181f,
-      168.2344f,  153.9725f,  11.2659f,   -47.5786f,  -92.6978f,  6.7573f,
-      -68.7704f,  -48.3850f,  -95.3637f,  8.8888f,    -76.9497f,  11.2243f,
-      60.9020f,   -77.6515f,  -80.7610f,  -78.4537f,  -77.4659f,  -36.2872f,
-      -0.0936f,   0.1966f,    -0.2121f,   0.0193f,    0.0489f,    -0.1445f,
-      0.0060f,    0.0358f,    -0.0783f,   -0.0985f,   -0.2072f,   -0.0802f,
-      -0.0185f,   0.1868f,    -0.0631f,   0.1260f,    -0.0675f,   0.2167f,
-      -0.2174f,   -0.1085f,   0.1483f,    -0.1655f,   -0.1040f,   0.1605f,
-      -0.1673f,   -0.0148f,   -0.1856f,   -0.1454f,   0.1603f,    -0.1620f,
-      -0.9205f,   -1.2716f,   -3.6561f,   -5.0834f,   -0.7934f,   1.8710f,
-      2.2999f,    -2.9516f,   -1.7631f,   -0.3804f,   41.2998f,   26.2358f,
-      28.9763f,   15.7315f,   5.2164f,    3.2963f,    -5.4457f,   18.6310f,
-      -25.0076f,  5.4368f,    -12.0085f,  17.1462f,   -14.6992f,  5.6365f,
-      48.6207f,   -1.0921f,   -1.8723f,   -3.5354f,   -5.1774f,   -1.0200f,
-      -0.1065f,   -0.2021f,   0.0332f,    0.1692f,    -0.1239f,   0.1325f,
-      -0.0660f,   -0.0567f,   0.2107f,    -0.2084f,   -0.0263f,   0.1411f,
-      0.0178f,    0.0451f,    0.2024f,    -0.1756f,   -0.0771f,   -0.1690f,
-      -0.2097f,   -0.2130f,   0.0714f,    0.0172f,    -0.0310f,   0.0649f,
-      -0.1550f,   0.0701f,    0.0306f,    -0.1750f,   -0.1988f,   -0.2060f,
-      0.0005f,    -0.1325f,   -0.1823f,   -0.0900f,   -0.1291f,   -0.1817f,
-      0.0144f,    0.0951f,    -0.1954f,   -0.0171f,   -0.1985f,   0.0875f,
-      0.0901f,    -0.0857f,   0.1681f,    0.0465f,    0.1023f,    0.0985f,
-      -0.2152f,   -0.1723f,   -0.0825f,   0.0203f,    -0.1206f,   -0.1431f,
-      -0.1552f,   0.1344f,    0.0398f,    0.0169f,    0.2180f,    -0.1530f,
-      2.7964f,    2.7312f,    2.8831f,    3.4729f,    -3.1366f,   2.4043f,
-      -7.2004f,   1.4128f,    2.8648f,    0.0578f,    225.5640f,  210.3712f,
-      210.6907f,  195.0339f,  0.3140f,    1.8060f,    2.7355f,    33.6917f,
-      3.3542f,    3.3682f,    1.7371f,    31.2424f,   3.4094f,    -0.1192f,
-      63.0864f,   3.0562f,    2.8633f,    2.6777f,    3.5495f,    -4.2616f,
-      -1.4034f,   0.3930f,    -4.6756f,   -9.9870f,   -27.8511f,  5.6071f,
-      -1.0862f,   34.4907f,   -10.4831f,  -0.0281f,   117.2617f,  104.9590f,
-      106.1515f,  93.9707f,   -16.8801f,  5.3036f,    -21.7458f,  98.5306f,
-      -20.7596f,  6.4733f,    -17.6440f,  98.3097f,   -31.9540f,  -17.0600f,
-      27.4543f,   -0.6140f,   -1.6182f,   -4.9167f,   -8.9017f,   -26.2485f,
-      -0.1952f,   -0.0462f,   -0.1958f,   0.1679f,    -0.1592f,   -0.1634f,
-      -0.0507f,   -0.0542f,   0.0038f,    -0.0343f,   0.0567f,    -0.1983f,
-      0.0250f,    -0.0762f,   0.0902f,    -0.0343f,   0.1240f,    0.1161f,
-      0.1237f,    0.1870f,    0.0346f,    0.0340f,    0.0625f,    -0.0355f,
-      0.0278f,    -0.1043f,   0.1755f,    0.0253f,    0.1750f,    -0.2070f,
-      -5.5531f,   -5.3122f,   -4.9348f,   -4.4782f,   -7.5686f,   -1.5478f,
-      -5.4341f,   0.5087f,    -2.1382f,   0.0798f,    208.3677f,  194.0083f,
-      194.4168f,  179.3082f,  1.4443f,    -1.5038f,   -1.4021f,   25.9363f,
-      -4.0635f,   -2.6785f,   -1.6640f,   22.2589f,   -1.4910f,   1.4715f,
-      59.1972f,   -4.9638f,   -5.1920f,   -4.9193f,   -5.2649f,   -8.0556f,
-      20.1226f,   12.0195f,   9.7385f,    10.7058f,   -27.4201f,  8.4869f,
-      -5.0826f,   32.9212f,   -2.0674f,   -0.0290f,   120.5002f,  112.3222f,
-      112.3287f,  104.1107f,  -20.6293f,  14.8534f,   -0.8748f,   103.1141f,
-      -1.1368f,   15.3716f,   2.7653f,    91.7285f,   -0.5991f,   -20.7338f,
-      35.9363f,   20.5104f,   11.1988f,   9.0368f,    10.6355f,   -26.5309f,
-      -0.2058f,   -0.2176f,   0.1331f,    -0.1415f,   -0.0825f,   -0.0470f,
-      -0.0615f,   0.1274f,    0.0076f,    -0.0575f,   -0.2065f,   0.0866f,
-      0.2166f,    -0.1942f,   -0.1952f,   0.1323f,    -0.1016f,   0.1803f,
-      -0.0424f,   0.1555f,    0.1118f,    0.1559f,    0.0337f,    -0.0341f,
-      -0.0430f,   0.1988f,    -0.0553f,   -0.0255f,   0.1817f,    0.0608f,
-      0.1431f,    0.0686f,    -0.0245f,   -0.2107f,   0.2001f,    -0.0964f,
-      -0.0090f,   0.1151f,    -0.0365f,   -0.1986f,   0.1740f,    -0.2098f,
-      0.0013f,    0.1369f,    0.1910f,    0.1801f,    -0.2019f,   0.0348f,
-      -0.1175f,   0.0627f,    -0.1929f,   -0.0099f,   0.1349f,    0.1804f,
-      -0.1071f,   -0.1651f,   -0.1146f,   -0.0259f,   0.1626f,    -0.0271f,
-      0.1393f,    0.1304f,    -0.0200f,   0.0924f,    -0.0839f,   -0.0031f,
-      -0.1311f,   0.0350f,    -0.1330f,   -0.0911f,   0.1949f,    -0.0209f,
-      -0.1883f,   0.0269f,    0.2040f,    0.1552f,    0.1532f,    0.1157f,
-      -0.1102f,   -0.1220f,   -0.0808f,   -0.1050f,   0.1716f,    0.0846f,
-      -0.0180f,   -0.1037f,   0.2063f,    0.1237f,    0.1253f,    -0.0496f,
-      -0.0183f,   0.0491f,    0.1703f,    -0.0824f,   -0.0702f,   -0.1100f,
-      -0.0965f,   0.0130f,    -0.1222f,   -0.1081f,   0.0329f,    0.2115f,
-      -0.1438f,   0.0799f,    -0.1602f,   -0.0330f,   0.0501f,    0.1072f,
-      -0.0744f,   -0.1783f,   -0.0240f,   0.0777f,    -0.1944f,   0.0438f,
-      -0.0033f,   -0.1873f,   0.0984f,    -0.0318f,   0.0773f,    0.1489f,
-      0.3966f,    0.4711f,    0.3972f,    0.0623f,    0.5970f,    0.1018f,
-      0.1375f,    -0.1881f,   0.8921f,    -0.1854f,   -2.1138f,   -2.1178f,
-      -1.8295f,   -2.1703f,   0.5784f,    -0.1937f,   -0.0728f,   -0.9953f,
-      0.2442f,    -0.4074f,   -0.1591f,   -1.1660f,   0.4832f,    0.2203f,
-      -1.4957f,   0.1544f,    0.1810f,    0.2275f,    0.4075f,    0.8153f,
-      0.0715f,    0.0222f,    0.0463f,    -0.0201f,   0.0396f,    0.5951f,
-      -0.2779f,   -0.0306f,   0.7532f,    -0.1596f,   -4.1080f,   -3.7925f,
-      -3.8522f,   -3.2468f,   0.7728f,    0.0188f,    -0.1448f,   0.4084f,
-      -0.4666f,   -0.1036f,   -1.1469f,   0.4243f,    0.2778f,    0.9023f,
-      -3.0216f,   0.0384f,    -0.3348f,   -0.0314f,   -0.2788f,   0.0479f,
-      139.0773f,  131.6164f,  115.0392f,  111.1817f,  41.7596f,   9.5379f,
-      1.8542f,    46.9890f,   -12.8221f,  0.0241f,    52.9779f,   51.5268f,
-      50.8060f,   48.7028f,   -132.9665f, 118.3478f,  101.1239f,  81.4608f,
-      75.4251f,   121.0643f,  97.8947f,   86.8911f,   74.5576f,   -133.7606f,
-      29.2657f,   135.8916f,  131.3661f,  114.1687f,  111.0784f,  31.3790f,
-      -0.0807f,   -0.0657f,   -0.0027f,   0.0410f,    0.0765f,    0.1194f,
-      0.0953f,    -0.0060f,   0.1531f,    -0.2339f,   0.1488f,    -0.0615f,
-      -0.0579f,   0.0761f,    0.1250f,    -0.0469f,   0.1480f,    0.0683f,
-      -0.0049f,   0.1558f,    0.2168f,    -0.0736f,   0.1135f,    -0.1244f,
-      0.0725f,    -0.1297f,   -0.0215f,   -0.0412f,   -0.1632f,   -0.0200f,
-      -0.1346f,   -0.1954f,   0.0053f,    0.0151f,    0.1379f,    -0.1497f,
-      -0.0102f,   -0.0336f,   0.0900f,    -0.1706f,   -0.0932f,   -0.2084f,
-      0.1242f,    -0.2027f,   0.0849f,    -0.2139f,   -0.2015f,   0.0944f,
-      -0.0984f,   0.2082f,    0.1625f,    -0.0227f,   -0.1676f,   0.1021f,
-      0.1516f,    0.0245f,    0.0955f,    -0.1488f,   -0.0057f,   0.1783f,
-      -0.8568f,   -0.8175f,   -0.6282f,   -1.3107f,   1.5712f,    0.1044f,
-      28.2289f,   3.0885f,    -1.9829f,   0.1600f,    -465.9583f, -459.5893f,
-      -457.5055f, -452.7600f, 1.7229f,    -0.6620f,   -0.1065f,   -52.8017f,
-      -2.0293f,   -0.8224f,   -1.0389f,   -49.9049f,  -1.2250f,   1.7647f,
-      -259.2465f, -1.0978f,   -0.5169f,   -0.8721f,   -0.8197f,   1.9158f,
-      16.2234f,   15.8523f,   13.8343f,   9.8509f,    -21.4326f,  15.7650f,
-      -6.4451f,   34.8575f,   1.1387f,    -0.0223f,   117.7213f,  109.8494f,
-      109.7624f,  101.8532f,  -20.3275f,  16.0812f,   4.9165f,    92.4919f,
-      4.1615f,    13.8451f,   9.2112f,    97.1580f,   -8.7037f,   -20.4420f,
-      27.1105f,   17.4922f,   13.9998f,   12.3888f,   11.4705f,   -20.9568f,
-      0.5457f,    0.5322f,    0.2823f,    0.3581f,    0.5359f,    0.1576f,
-      0.1969f,    -0.0136f,   -0.2748f,   -0.3168f,   -0.3918f,   -0.2167f,
-      -0.1797f,   -0.1869f,   0.2986f,    -0.2116f,   -0.4226f,   -0.2022f,
-      0.9452f,    0.5474f,    -0.1218f,   0.2067f,    -0.1600f,   0.1937f,
-      0.0808f,    0.4877f,    0.5106f,    0.2626f,    0.5076f,    0.6228f,
-      0.5124f,    0.4044f,    0.4023f,    0.1222f,    2.5446f,    0.9623f,
-      24.9875f,   4.7442f,    -2.0551f,   0.1642f,    -449.9478f, -444.1841f,
-      -442.0153f, -437.1498f, 2.3209f,    -0.6986f,   -0.3456f,   -47.4074f,
-      -1.2374f,   -1.0939f,   -0.9112f,   -41.1851f,  -0.5064f,   2.4209f,
-      -263.4446f, -0.0433f,   0.3460f,    0.1475f,    0.3770f,    2.9154f,
-      0.2032f,    0.1527f,    0.2161f,    -0.1981f,   0.1893f,    -0.2003f,
-      0.1734f,    0.1713f,    0.1207f,    -0.2073f,   -0.1018f,   0.0770f,
-      0.0728f,    0.1665f,    0.0689f,    0.1884f,    -0.1399f,   -0.1326f,
-      -0.0518f,   -0.1948f,   0.1576f,    -0.1835f,   0.1436f,    0.0497f,
-      0.0883f,    -0.1253f,   -0.0417f,   -0.0507f,   -0.1555f,   0.2076f,
-      -2.4080f,   6.1616f,    -0.8564f,   -13.6773f,  -32.7238f,  -16.3144f,
-      -1.9828f,   20.5110f,   -17.0191f,  -1.7154f,   103.6642f,  95.3675f,
-      95.5662f,   86.9504f,   -35.5340f,  19.6681f,   -2.4900f,   65.0847f,
-      -15.8119f,  13.7256f,   -4.6753f,   63.4713f,   -6.5992f,   -34.2369f,
-      41.3959f,   -1.5528f,   3.8106f,    -0.7762f,   -12.3204f,  -35.1734f,
-      -83.9509f,  -87.4861f,  -83.5925f,  -81.5047f,  -54.1256f,  -45.7506f,
-      -13.5325f,  -6.0331f,   -8.5062f,   0.0261f,    189.9450f,  177.7870f,
-      178.6945f,  164.9762f,  9.8521f,    -68.0619f,  -68.6145f,  6.5056f,
-      -55.9651f,  -66.9540f,  -65.3349f,  -2.1954f,   -57.2408f,  8.6577f,
-      60.6966f,   -82.1056f,  -88.5245f,  -83.3057f,  -80.7283f,  -50.5285f,
-      -0.1397f,   0.1862f,    -0.0691f,   -0.0906f,   0.1560f,    0.1377f,
-      -0.0066f,   -0.0213f,   0.0708f,    -0.0386f,   -0.0015f,   -0.0020f,
-      -0.2122f,   0.0747f,    0.0795f,    0.0229f,    0.1923f,    -0.1661f,
-      0.0895f,    0.1176f,    0.1398f,    -0.0443f,   0.0934f,    0.0638f,
-      -0.1924f,   0.0602f,    0.0404f,    0.1597f,    0.1387f,    -0.0601f,
-      -28.3967f,  -21.8483f,  -25.5175f,  -29.9252f,  2.0161f,    -3.0092f,
-      7.7435f,    28.2367f,   -35.0188f,  -0.1578f,   105.0164f,  93.4495f,
-      94.9134f,   81.0315f,   4.3602f,    8.1303f,    -37.7665f,  -16.6986f,
-      -40.8902f,  8.2542f,    -33.3215f,  -2.0457f,   -69.0245f,  4.1016f,
-      47.2770f,   -25.8268f,  -23.6034f,  -26.4339f,  -27.8305f,  8.4468f,
-      13.8742f,   8.3874f,    4.2044f,    1.4619f,    -40.2909f,  -0.6358f,
-      -0.7982f,   36.1931f,   -17.3147f,  -0.3348f,   106.8135f,  96.5298f,
-      97.8829f,   86.9994f,   -25.8170f,  15.0652f,   -0.9181f,   85.8544f,
-      2.5475f,    9.8009f,    -3.5931f,   89.2017f,   -3.7252f,   -25.2986f,
-      22.5505f,   14.0434f,   7.0708f,    4.6646f,    1.5807f,    -39.4024f,
-      -0.1436f,   0.0256f,    0.0274f,    -0.2126f,   0.0401f,    0.0745f,
-      -0.0379f,   -0.0357f,   0.0777f,    -0.0709f,   -0.1093f,   -0.2047f,
-      -0.0713f,   -0.0478f,   -0.0908f,   0.1963f,    0.1282f,    0.0977f,
-      0.1304f,    0.2058f,    0.0700f,    0.0518f,    0.0239f,    0.0686f,
-      -0.1909f,   0.0828f,    -0.1243f,   -0.1920f,   0.1908f,    -0.0808f,
-      90.8028f,   89.2894f,   84.5339f,   83.3491f,   -13.3838f,  12.0240f,
-      -3.9443f,   63.0867f,   -2.5321f,   -0.0099f,   68.9140f,   66.3206f,
-      66.0278f,   63.1498f,   -83.7261f,  74.3448f,   73.4998f,   64.8477f,
-      69.7701f,   74.5878f,   71.0331f,   63.2116f,   74.3162f,   -83.9282f,
-      20.8163f,   89.6818f,   88.6452f,   83.7338f,   82.9360f,   -13.2357f,
-      0.1299f,    -0.1765f,   -0.0168f,   -0.1372f,   -0.1183f,   0.0472f,
-      0.1312f,    0.0267f,    0.0194f,    -0.1593f,   0.0059f,    0.1775f,
-      0.0668f,    -0.1239f,   -0.1982f,   -0.1415f,   -0.1659f,   -0.1148f,
-      0.0136f,    0.0913f,    -0.1254f,   -0.0357f,   0.0892f,    0.0835f,
-      -0.0554f,   0.1969f,    -0.0888f,   -0.0623f,   -0.0236f,   -0.1492f,
-      0.4196f,    0.3218f,    0.2287f,    0.5095f,    0.7210f,    0.2279f,
-      0.4523f,    -0.1832f,   1.3095f,    -0.2041f,   -2.1443f,   -2.1947f,
-      -1.9292f,   -2.1142f,   0.5840f,    0.1018f,    0.1011f,    -1.6565f,
-      0.4325f,    0.0424f,    0.2836f,    -1.7183f,   0.2595f,    0.2686f,
-      -1.8784f,   0.3891f,    0.3050f,    0.6195f,    0.2896f,    0.5905f,
-      -5.3024f,   -3.2518f,   -12.5192f,  -29.1732f,  1.6538f,    -1.8315f,
-      9.9788f,    10.5155f,   6.3234f,    -0.3460f,   76.9925f,   51.3785f,
-      55.7120f,   29.0432f,   5.5901f,    25.6578f,   -3.9565f,   13.0509f,
-      -106.0371f, 23.2124f,   -18.2004f,  8.4618f,    -69.3585f,  5.5651f,
-      80.0565f,   -6.4941f,   -5.3742f,   -14.4209f,  -24.1565f,  6.6801f,
-      -22.0585f,  -20.9909f,  -26.7939f,  -29.6890f,  -14.5085f,  2.1866f,
-      -4.2608f,   17.3977f,   -30.8824f,  -0.4017f,   135.6957f,  126.9320f,
-      127.0044f,  118.1835f,  -1.8768f,   -0.8629f,   -32.0882f,  44.7862f,
-      -23.9174f,  1.6485f,    -27.9940f,  51.9078f,   -48.5279f,  -1.7550f,
-      49.9230f,   -19.9785f,  -22.4647f,  -27.6911f,  -27.3197f,  -10.6545f,
-      -0.1922f,   -0.1999f,   -0.1396f,   0.1065f,    0.0085f,    -0.1940f,
-      0.0351f,    0.1285f,    -0.0292f,   -0.1296f,   0.1543f,    -0.2082f,
-      -0.1758f,   0.0719f,    0.0764f,    0.1394f,    -0.0255f,   -0.0370f,
-      0.1615f,    -0.0568f,   0.1920f,    -0.1631f,   0.0199f,    0.1884f,
-      0.0693f,    0.1074f,    -0.0273f,   0.1540f,    0.0098f,    0.2111f,
-      0.1805f,    -0.0555f,   0.1159f,    0.0469f,    0.1789f,    -0.1711f,
-      -0.1304f,   0.1912f,    -0.0737f,   -0.1408f,   0.1804f,    -0.2023f,
-      -0.0467f,   -0.1019f,   -0.0136f,   0.0691f,    0.1454f,    -0.0213f,
-      0.0929f,    -0.0958f,   0.1299f,    0.1137f,    0.1175f,    0.1042f,
-      -0.2081f,   -0.0737f,   0.0582f,    0.1640f,    0.2120f,    -0.0646f,
-      -0.0326f,   0.1976f,    0.1182f,    -0.1365f,   -0.1784f,   0.2113f,
-      0.0469f,    0.0763f,    -0.0197f,   -0.1902f,   0.1259f,    0.1598f,
-      -0.0180f,   -0.1339f,   -0.1675f,   -0.1884f,   -0.1973f,   0.1529f,
-      0.1160f,    0.2154f,    -0.1446f,   -0.1395f,   0.0355f,    0.1513f,
-      -0.2086f,   -0.1135f,   -0.1502f,   -0.0018f,   0.0486f,    -0.0110f,
-      -0.0843f,   -0.0716f,   -0.1367f,   0.0753f,    0.0114f,    0.0475f,
-      -0.0632f,   0.2045f,    -0.0512f,   -0.0906f,   -0.1071f,   -0.1957f,
-      0.1361f,    0.1821f,    -0.1684f,   -0.1383f,   0.1059f,    0.1579f,
-      -0.0064f,   -0.1205f,   -0.0718f,   -0.1323f,   -0.0174f,   -0.1092f,
-      -0.1915f,   0.1978f,    -0.1245f,   0.1297f,    -0.1542f,   0.1556f,
-      -0.1752f,   0.0718f,    -0.1020f,   -0.1970f,   0.0518f,    -0.0888f,
-      0.0541f,    -0.1922f,   -0.1467f,   -0.0653f,   -0.1940f,   -0.0800f,
-      -0.1096f,   -0.0796f,   -0.1310f,   0.0191f,    -0.1077f,   -0.0973f,
-      0.1566f,    0.0074f,    0.0500f,    -0.0415f,   -0.2116f,   0.0227f,
-      0.0895f,    0.1528f,    0.1404f,    0.0467f,    0.0462f,    -0.0973f,
-      -0.1669f,   0.0551f,    0.1167f,    -0.1470f,   -0.0542f,   -0.1006f,
-      0.2104f,    0.1039f,    -0.0211f,   -0.1726f,   -0.0694f,   -0.0270f,
-      0.0277f,    -0.0715f,   -0.2055f,   -0.1502f,   -0.1718f,   -0.0043f,
-      0.0174f,    0.1019f,    -0.0233f,   -0.1518f,   -0.1331f,   -0.0001f,
-      -0.1483f,   -0.2115f,   0.0666f,    0.0014f,    0.1601f,    -0.0690f,
-    };
-
-static const float av1_rdcost_model_nn_biases_layer0[NUM_HIDDEN_NODES] = {
-  0.156824f,   0.f,         0.130013f,   0.084482f,  -129.058197f, -15.090252f,
-  -3.859116f,  0.736356f,   -81.361557f, -0.001922f, -0.000713f,   0.440181f,
-  14.982646f,  1.282223f,   2.23122f,    94.26635f,  93.920929f,   0.614672f,
-  0.f,         0.315858f,   4.746014f,   0.116901f,  -35.661354f,  -75.148285f,
-  92.006989f,  -14.112332f, 86.673157f,  -0.000307f, -0.000544f,   0.f,
-  -7.851313f,  0.505186f,   0.f,         0.f,        -111.681091f, -0.937782f,
-  0.035789f,   0.f,         0.f,         -0.00102f,  -75.180527f,  0.f,
-  -63.821148f, 79.592392f,  0.085068f,   11.184906f, 1.25406f,     0.f,
-  -29.779242f, -0.181732f,  0.f,         0.425554f,  -90.78405f,   0.f,
-  -0.828326f,  -81.132179f, 0.f,         -2.757063f, 0.f,          0.f,
-  2.967951f,   -4.440599f,  0.f,         -5.105355f, 14.734543f,   0.f,
-  0.f,         0.f,         0.f,         0.295342f,  -0.026907f,   133.375412f,
-  -0.000855f,  0.f,         -0.875029f,  15.665165f, 0.437296f,    0.321257f,
-  -0.001932f,  -4.235782f,  -87.187782f, 0.f,        -28.84696f,   7.055514f,
-  0.f,         95.548302f,  -0.000425f,  0.38969f,   -13.88008f,   -27.347931f,
-  0.f,         0.f,         0.f,         -0.000026f, 0.f,          0.f,
-};
-
-static const float
-    av1_rdcost_model_nn_weights_layer1[NUM_HIDDEN_NODES * NUM_OUTPUTS] = {
-      -0.101706f,   -0.14411f,    -0.139118f,   -0.132945f,   118.811302f,
-      3.137232f,    -32.969776f,  -4.150725f,   26.263071f,   0.092841f,
-      0.174125f,    -0.028195f,   15.712872f,   17.722702f,   5.666006f,
-      -121.143929f, -131.933731f, -3.000318f,   -0.032063f,   -0.380065f,
-      -1.660653f,   -0.164802f,   7.177527f,    87.759155f,   -119.564224f,
-      -98.051651f,  -110.581116f, -0.069982f,   0.023906f,    0.183792f,
-      40.606274f,   -0.080804f,   -0.053744f,   -0.187848f,   157.44313f,
-      -4.820149f,   0.089499f,    0.070232f,    -0.043038f,   0.072996f,
-      93.347313f,   0.225259f,    103.223228f,  -110.682541f, 0.14314f,
-      -89.827538f,  6.505952f,    -0.076949f,   73.816132f,   -0.063416f,
-      -0.23736f,    -0.066059f,   116.049599f,  0.120871f,    -4.708246f,
-      107.501671f,  -0.206708f,   -32.688675f,  0.047608f,    -0.105907f,
-      6.505825f,    -75.461891f,  -0.160341f,   6.532121f,    -84.868111f,
-      -0.065622f,   0.044756f,    0.008672f,    0.017155f,    0.046108f,
-      -0.218818f,   -126.507957f, 0.028271f,    0.180625f,    -4.707376f,
-      -121.524307f, -0.03853f,    -4.103166f,   -0.018947f,   -95.768463f,
-      15.941695f,   0.147154f,    -102.863029f, -72.521698f,  -0.037133f,
-      -138.1492f,   0.210016f,    -0.084692f,   -68.693665f,  -52.523472f,
-      -0.133385f,   -0.17438f,    0.008654f,    -0.035642f,   -0.145202f,
-      0.211135f,
-    };
-
-static const float av1_rdcost_model_nn_biases_layer1[NUM_OUTPUTS] = {
-  0.251909f
-};
-
-static const NN_CONFIG av1_rdcost_model_nnconfig = {
-  NUM_FEATURES,
-  NUM_OUTPUTS,
-  NUM_HIDDEN_LAYERS,
-  {
-      NUM_HIDDEN_NODES,
-  },
-  {
-      av1_rdcost_model_nn_weights_layer0,
-      av1_rdcost_model_nn_weights_layer1,
-  },
-  {
-      av1_rdcost_model_nn_biases_layer0,
-      av1_rdcost_model_nn_biases_layer1,
-  },
-};
-
-//------------------------------------------------------------------------------
-
-#undef NUM_FEATURES
-#undef NUM_HIDDEN_LAYERS
-#undef NUM_HIDDEN_NODES
-#undef NUM_OUTPUTS
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_
diff --git a/third_party/aom/av1/encoder/ratectrl.c b/third_party/aom/av1/encoder/ratectrl.c
deleted file mode 100644
index 2597fb990..000000000
--- a/third_party/aom/av1/encoder/ratectrl.c
+++ /dev/null
@@ -1,1776 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/encoder/aq_cyclicrefresh.h"
-#include "av1/common/common.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/random.h"
-#include "av1/encoder/ratectrl.h"
-
-// Max rate target for 1080P and below encodes under normal circumstances
-// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
-#define MAX_MB_RATE 250
-#define MAXRATE_1080P 2025000
-
-#define DEFAULT_KF_BOOST 2000
-#define DEFAULT_GF_BOOST 2000
-
-#define MIN_BPB_FACTOR 0.005
-#define MAX_BPB_FACTOR 50
-
-#define FRAME_OVERHEAD_BITS 200
-#define ASSIGN_MINQ_TABLE(bit_depth, name)                   \
-  do {                                                       \
-    switch (bit_depth) {                                     \
-      case AOM_BITS_8: name = name##_8; break;               \
-      case AOM_BITS_10: name = name##_10; break;             \
-      case AOM_BITS_12: name = name##_12; break;             \
-      default:                                               \
-        assert(0 &&                                          \
-               "bit_depth should be AOM_BITS_8, AOM_BITS_10" \
-               " or AOM_BITS_12");                           \
-        name = NULL;                                         \
-    }                                                        \
-  } while (0)
-
-// Tables relating active max Q to active min Q
-static int kf_low_motion_minq_8[QINDEX_RANGE];
-static int kf_high_motion_minq_8[QINDEX_RANGE];
-static int arfgf_low_motion_minq_8[QINDEX_RANGE];
-static int arfgf_high_motion_minq_8[QINDEX_RANGE];
-static int inter_minq_8[QINDEX_RANGE];
-static int rtc_minq_8[QINDEX_RANGE];
-
-static int kf_low_motion_minq_10[QINDEX_RANGE];
-static int kf_high_motion_minq_10[QINDEX_RANGE];
-static int arfgf_low_motion_minq_10[QINDEX_RANGE];
-static int arfgf_high_motion_minq_10[QINDEX_RANGE];
-static int inter_minq_10[QINDEX_RANGE];
-static int rtc_minq_10[QINDEX_RANGE];
-static int kf_low_motion_minq_12[QINDEX_RANGE];
-static int kf_high_motion_minq_12[QINDEX_RANGE];
-static int arfgf_low_motion_minq_12[QINDEX_RANGE];
-static int arfgf_high_motion_minq_12[QINDEX_RANGE];
-static int inter_minq_12[QINDEX_RANGE];
-static int rtc_minq_12[QINDEX_RANGE];
-
-static int gf_high = 2000;
-static int gf_low = 400;
-static int kf_high = 5000;
-static int kf_low = 400;
-
-// How many times less pixels there are to encode given the current scaling.
-// Temporary replacement for rcf_mult and rate_thresh_mult.
-static double resize_rate_factor(const AV1_COMP *cpi, int width, int height) {
-  return (double)(cpi->oxcf.width * cpi->oxcf.height) / (width * height);
-}
-
-// Functions to compute the active minq lookup table entries based on a
-// formulaic approach to facilitate easier adjustment of the Q tables.
-// The formulae were derived from computing a 3rd order polynomial best
-// fit to the original data (after plotting real maxq vs minq (not q index))
-static int get_minq_index(double maxq, double x3, double x2, double x1,
-                          aom_bit_depth_t bit_depth) {
-  int i;
-  const double minqtarget = AOMMIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq);
-
-  // Special case handling to deal with the step from q2.0
-  // down to lossless mode represented by q 1.0.
-  if (minqtarget <= 2.0) return 0;
-
-  for (i = 0; i < QINDEX_RANGE; i++) {
-    if (minqtarget <= av1_convert_qindex_to_q(i, bit_depth)) return i;
-  }
-
-  return QINDEX_RANGE - 1;
-}
-
-static void init_minq_luts(int *kf_low_m, int *kf_high_m, int *arfgf_low,
-                           int *arfgf_high, int *inter, int *rtc,
-                           aom_bit_depth_t bit_depth) {
-  int i;
-  for (i = 0; i < QINDEX_RANGE; i++) {
-    const double maxq = av1_convert_qindex_to_q(i, bit_depth);
-    kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.150, bit_depth);
-    kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.45, bit_depth);
-    arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30, bit_depth);
-    arfgf_high[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth);
-    inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90, bit_depth);
-    rtc[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth);
-  }
-}
-
-void av1_rc_init_minq_luts(void) {
-  init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8,
-                 arfgf_low_motion_minq_8, arfgf_high_motion_minq_8,
-                 inter_minq_8, rtc_minq_8, AOM_BITS_8);
-  init_minq_luts(kf_low_motion_minq_10, kf_high_motion_minq_10,
-                 arfgf_low_motion_minq_10, arfgf_high_motion_minq_10,
-                 inter_minq_10, rtc_minq_10, AOM_BITS_10);
-  init_minq_luts(kf_low_motion_minq_12, kf_high_motion_minq_12,
-                 arfgf_low_motion_minq_12, arfgf_high_motion_minq_12,
-                 inter_minq_12, rtc_minq_12, AOM_BITS_12);
-}
-
-// These functions use formulaic calculations to make playing with the
-// quantizer tables easier. If necessary they can be replaced by lookup
-// tables if and when things settle down in the experimental bitstream
-double av1_convert_qindex_to_q(int qindex, aom_bit_depth_t bit_depth) {
-  // Convert the index to a real Q value (scaled down to match old Q values)
-  switch (bit_depth) {
-    case AOM_BITS_8: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 4.0;
-    case AOM_BITS_10: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 16.0;
-    case AOM_BITS_12: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 64.0;
-    default:
-      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
-      return -1.0;
-  }
-}
-
-int av1_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
-                       double correction_factor, aom_bit_depth_t bit_depth) {
-  const double q = av1_convert_qindex_to_q(qindex, bit_depth);
-  int enumerator = frame_type == KEY_FRAME ? 2700000 : 1800000;
-
-  assert(correction_factor <= MAX_BPB_FACTOR &&
-         correction_factor >= MIN_BPB_FACTOR);
-
-  // q based adjustment to baseline enumerator
-  enumerator += (int)(enumerator * q) >> 12;
-  return (int)(enumerator * correction_factor / q);
-}
-
-int av1_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
-                           double correction_factor,
-                           aom_bit_depth_t bit_depth) {
-  const int bpm =
-      (int)(av1_rc_bits_per_mb(frame_type, q, correction_factor, bit_depth));
-  return AOMMAX(FRAME_OVERHEAD_BITS,
-                (int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS);
-}
-
-int av1_rc_clamp_pframe_target_size(const AV1_COMP *const cpi, int target) {
-  const RATE_CONTROL *rc = &cpi->rc;
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  const int min_frame_target =
-      AOMMAX(rc->min_frame_bandwidth, rc->avg_frame_bandwidth >> 5);
-  // Clip the frame target to the minimum setup value.
-  if (cpi->rc.is_src_frame_alt_ref) {
-    // If there is an active ARF at this location use the minimum
-    // bits on this frame even if it is a constructed arf.
-    // The active maximum quantizer insures that an appropriate
-    // number of bits will be spent if needed for constructed ARFs.
-    target = min_frame_target;
-  } else if (target < min_frame_target) {
-    target = min_frame_target;
-  }
-
-  // Clip the frame target to the maximum allowed value.
-  if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth;
-  if (oxcf->rc_max_inter_bitrate_pct) {
-    const int max_rate =
-        rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100;
-    target = AOMMIN(target, max_rate);
-  }
-
-  return target;
-}
-
-int av1_rc_clamp_iframe_target_size(const AV1_COMP *const cpi, int target) {
-  const RATE_CONTROL *rc = &cpi->rc;
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  if (oxcf->rc_max_intra_bitrate_pct) {
-    const int max_rate =
-        rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100;
-    target = AOMMIN(target, max_rate);
-  }
-  if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth;
-  return target;
-}
-
-// Update the buffer level: leaky bucket model.
-static void update_buffer_level(AV1_COMP *cpi, int encoded_frame_size) {
-  const AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-
-  // Non-viewable frames are a special case and are treated as pure overhead.
-  // TODO(zoeliu): To further explore whether we should treat BWDREF_FRAME
-  //               differently, since it is a no-show frame.
-  if (!cm->show_frame && !rc->is_bwd_ref_frame)
-    rc->bits_off_target -= encoded_frame_size;
-  else
-    rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size;
-
-  // Clip the buffer level to the maximum specified buffer size.
-  rc->bits_off_target = AOMMIN(rc->bits_off_target, rc->maximum_buffer_size);
-  rc->buffer_level = rc->bits_off_target;
-}
-
-int av1_rc_get_default_min_gf_interval(int width, int height,
-                                       double framerate) {
-  // Assume we do not need any constraint lower than 4K 20 fps
-  static const double factor_safe = 3840 * 2160 * 20.0;
-  const double factor = width * height * framerate;
-  const int default_interval =
-      clamp((int)(framerate * 0.125), MIN_GF_INTERVAL, MAX_GF_INTERVAL);
-
-  if (factor <= factor_safe)
-    return default_interval;
-  else
-    return AOMMAX(default_interval,
-                  (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5));
-  // Note this logic makes:
-  // 4K24: 5
-  // 4K30: 6
-  // 4K60: 12
-}
-
-int av1_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) {
-  int interval = AOMMIN(MAX_GF_INTERVAL, (int)(framerate * 0.75));
-  interval += (interval & 0x01);  // Round to even value
-#if CONFIG_FIX_GF_LENGTH
-  interval = AOMMAX(FIXED_GF_LENGTH, interval);
-#endif
-  return AOMMAX(interval, min_gf_interval);
-}
-
-void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
-  int i;
-
-  if (pass == 0 && oxcf->rc_mode == AOM_CBR) {
-    rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
-    rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
-  } else {
-    rc->avg_frame_qindex[KEY_FRAME] =
-        (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2;
-    rc->avg_frame_qindex[INTER_FRAME] =
-        (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2;
-  }
-
-  rc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
-  rc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
-
-  rc->buffer_level = rc->starting_buffer_level;
-  rc->bits_off_target = rc->starting_buffer_level;
-
-  rc->rolling_target_bits = rc->avg_frame_bandwidth;
-  rc->rolling_actual_bits = rc->avg_frame_bandwidth;
-  rc->long_rolling_target_bits = rc->avg_frame_bandwidth;
-  rc->long_rolling_actual_bits = rc->avg_frame_bandwidth;
-
-  rc->total_actual_bits = 0;
-  rc->total_target_bits = 0;
-  rc->total_target_vs_actual = 0;
-
-  rc->frames_since_key = 8;  // Sensible default for first frame.
-  rc->this_key_frame_forced = 0;
-  rc->next_key_frame_forced = 0;
-  rc->source_alt_ref_pending = 0;
-  rc->source_alt_ref_active = 0;
-
-  rc->frames_till_gf_update_due = 0;
-  rc->ni_av_qi = oxcf->worst_allowed_q;
-  rc->ni_tot_qi = 0;
-  rc->ni_frames = 0;
-
-  rc->tot_q = 0.0;
-  rc->avg_q = av1_convert_qindex_to_q(oxcf->worst_allowed_q, oxcf->bit_depth);
-
-  for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
-    rc->rate_correction_factors[i] = 0.7;
-  }
-  rc->rate_correction_factors[KF_STD] = 1.0;
-  rc->min_gf_interval = oxcf->min_gf_interval;
-  rc->max_gf_interval = oxcf->max_gf_interval;
-  if (rc->min_gf_interval == 0)
-    rc->min_gf_interval = av1_rc_get_default_min_gf_interval(
-        oxcf->width, oxcf->height, oxcf->init_framerate);
-  if (rc->max_gf_interval == 0)
-    rc->max_gf_interval = av1_rc_get_default_max_gf_interval(
-        oxcf->init_framerate, rc->min_gf_interval);
-  rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
-}
-
-int av1_rc_drop_frame(AV1_COMP *cpi) {
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  RATE_CONTROL *const rc = &cpi->rc;
-
-  if (!oxcf->drop_frames_water_mark) {
-    return 0;
-  } else {
-    if (rc->buffer_level < 0) {
-      // Always drop if buffer is below 0.
-      return 1;
-    } else {
-      // If buffer is below drop_mark, for now just drop every other frame
-      // (starting with the next frame) until it increases back over drop_mark.
-      int drop_mark =
-          (int)(oxcf->drop_frames_water_mark * rc->optimal_buffer_level / 100);
-      if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) {
-        --rc->decimation_factor;
-      } else if (rc->buffer_level <= drop_mark && rc->decimation_factor == 0) {
-        rc->decimation_factor = 1;
-      }
-      if (rc->decimation_factor > 0) {
-        if (rc->decimation_count > 0) {
-          --rc->decimation_count;
-          return 1;
-        } else {
-          rc->decimation_count = rc->decimation_factor;
-          return 0;
-        }
-      } else {
-        rc->decimation_count = 0;
-        return 0;
-      }
-    }
-  }
-}
-
-static double get_rate_correction_factor(const AV1_COMP *cpi, int width,
-                                         int height) {
-  const RATE_CONTROL *const rc = &cpi->rc;
-  double rcf;
-
-  if (cpi->common.frame_type == KEY_FRAME) {
-    rcf = rc->rate_correction_factors[KF_STD];
-  } else if (cpi->oxcf.pass == 2) {
-    RATE_FACTOR_LEVEL rf_lvl =
-        cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
-    rcf = rc->rate_correction_factors[rf_lvl];
-  } else {
-    if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
-        !rc->is_src_frame_alt_ref &&
-        (cpi->oxcf.rc_mode != AOM_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
-      rcf = rc->rate_correction_factors[GF_ARF_STD];
-    else
-      rcf = rc->rate_correction_factors[INTER_NORMAL];
-  }
-  rcf *= resize_rate_factor(cpi, width, height);
-  return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
-}
-
-static void set_rate_correction_factor(AV1_COMP *cpi, double factor, int width,
-                                       int height) {
-  RATE_CONTROL *const rc = &cpi->rc;
-
-  // Normalize RCF to account for the size-dependent scaling factor.
-  factor /= resize_rate_factor(cpi, width, height);
-
-  factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
-
-  if (cpi->common.frame_type == KEY_FRAME) {
-    rc->rate_correction_factors[KF_STD] = factor;
-  } else if (cpi->oxcf.pass == 2) {
-    RATE_FACTOR_LEVEL rf_lvl =
-        cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
-    rc->rate_correction_factors[rf_lvl] = factor;
-  } else {
-    if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
-        !rc->is_src_frame_alt_ref &&
-        (cpi->oxcf.rc_mode != AOM_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
-      rc->rate_correction_factors[GF_ARF_STD] = factor;
-    else
-      rc->rate_correction_factors[INTER_NORMAL] = factor;
-  }
-}
-
-void av1_rc_update_rate_correction_factors(AV1_COMP *cpi, int width,
-                                           int height) {
-  const AV1_COMMON *const cm = &cpi->common;
-  int correction_factor = 100;
-  double rate_correction_factor =
-      get_rate_correction_factor(cpi, width, height);
-  double adjustment_limit;
-  const int MBs = av1_get_MBs(width, height);
-
-  int projected_size_based_on_q = 0;
-
-  // Do not update the rate factors for arf overlay frames.
-  if (cpi->rc.is_src_frame_alt_ref) return;
-
-  // Clear down mmx registers to allow floating point in what follows
-  aom_clear_system_state();
-
-  // Work out how big we would have expected the frame to be at this Q given
-  // the current correction factor.
-  // Stay in double to avoid int overflow when values are large
-  if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->common.seg.enabled) {
-    projected_size_based_on_q =
-        av1_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor);
-  } else {
-    projected_size_based_on_q = av1_estimate_bits_at_q(
-        cpi->common.frame_type, cm->base_qindex, MBs, rate_correction_factor,
-        cm->seq_params.bit_depth);
-  }
-  // Work out a size correction factor.
-  if (projected_size_based_on_q > FRAME_OVERHEAD_BITS)
-    correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) /
-                              projected_size_based_on_q);
-
-  // More heavily damped adjustment used if we have been oscillating either side
-  // of target.
-  if (correction_factor > 0) {
-    adjustment_limit =
-        0.25 + 0.5 * AOMMIN(1, fabs(log10(0.01 * correction_factor)));
-  } else {
-    adjustment_limit = 0.75;
-  }
-
-  cpi->rc.q_2_frame = cpi->rc.q_1_frame;
-  cpi->rc.q_1_frame = cm->base_qindex;
-  cpi->rc.rc_2_frame = cpi->rc.rc_1_frame;
-  if (correction_factor > 110)
-    cpi->rc.rc_1_frame = -1;
-  else if (correction_factor < 90)
-    cpi->rc.rc_1_frame = 1;
-  else
-    cpi->rc.rc_1_frame = 0;
-
-  if (correction_factor > 102) {
-    // We are not already at the worst allowable quality
-    correction_factor =
-        (int)(100 + ((correction_factor - 100) * adjustment_limit));
-    rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
-    // Keep rate_correction_factor within limits
-    if (rate_correction_factor > MAX_BPB_FACTOR)
-      rate_correction_factor = MAX_BPB_FACTOR;
-  } else if (correction_factor < 99) {
-    // We are not already at the best allowable quality
-    correction_factor =
-        (int)(100 - ((100 - correction_factor) * adjustment_limit));
-    rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
-
-    // Keep rate_correction_factor within limits
-    if (rate_correction_factor < MIN_BPB_FACTOR)
-      rate_correction_factor = MIN_BPB_FACTOR;
-  }
-
-  set_rate_correction_factor(cpi, rate_correction_factor, width, height);
-}
-
-int av1_rc_regulate_q(const AV1_COMP *cpi, int target_bits_per_frame,
-                      int active_best_quality, int active_worst_quality,
-                      int width, int height) {
-  const AV1_COMMON *const cm = &cpi->common;
-  int q = active_worst_quality;
-  int last_error = INT_MAX;
-  int i, target_bits_per_mb, bits_per_mb_at_this_q;
-  const int MBs = av1_get_MBs(width, height);
-  const double correction_factor =
-      get_rate_correction_factor(cpi, width, height);
-
-  // Calculate required scaling factor based on target frame size and size of
-  // frame produced using previous Q.
-  target_bits_per_mb =
-      (int)((uint64_t)(target_bits_per_frame) << BPER_MB_NORMBITS) / MBs;
-
-  i = active_best_quality;
-
-  do {
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
-      bits_per_mb_at_this_q =
-          (int)av1_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
-    } else {
-      bits_per_mb_at_this_q = (int)av1_rc_bits_per_mb(
-          cm->frame_type, i, correction_factor, cm->seq_params.bit_depth);
-    }
-
-    if (bits_per_mb_at_this_q <= target_bits_per_mb) {
-      if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
-        q = i;
-      else
-        q = i - 1;
-
-      break;
-    } else {
-      last_error = bits_per_mb_at_this_q - target_bits_per_mb;
-    }
-  } while (++i <= active_worst_quality);
-
-  // In CBR mode, this makes sure q is between oscillating Qs to prevent
-  // resonance.
-  if (cpi->oxcf.rc_mode == AOM_CBR &&
-      (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
-      cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
-    q = clamp(q, AOMMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
-              AOMMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
-  }
-  return q;
-}
-
-static int get_active_quality(int q, int gfu_boost, int low, int high,
-                              int *low_motion_minq, int *high_motion_minq) {
-  if (gfu_boost > high) {
-    return low_motion_minq[q];
-  } else if (gfu_boost < low) {
-    return high_motion_minq[q];
-  } else {
-    const int gap = high - low;
-    const int offset = high - gfu_boost;
-    const int qdiff = high_motion_minq[q] - low_motion_minq[q];
-    const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
-    return low_motion_minq[q] + adjustment;
-  }
-}
-
-static int get_kf_active_quality(const RATE_CONTROL *const rc, int q,
-                                 aom_bit_depth_t bit_depth) {
-  int *kf_low_motion_minq;
-  int *kf_high_motion_minq;
-  ASSIGN_MINQ_TABLE(bit_depth, kf_low_motion_minq);
-  ASSIGN_MINQ_TABLE(bit_depth, kf_high_motion_minq);
-  return get_active_quality(q, rc->kf_boost, kf_low, kf_high,
-                            kf_low_motion_minq, kf_high_motion_minq);
-}
-
-static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
-                                 aom_bit_depth_t bit_depth) {
-  int *arfgf_low_motion_minq;
-  int *arfgf_high_motion_minq;
-  ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq);
-  ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
-  return get_active_quality(q, rc->gfu_boost, gf_low, gf_high,
-                            arfgf_low_motion_minq, arfgf_high_motion_minq);
-}
-
-#if REDUCE_LAST_ALT_BOOST
-static int get_gf_high_motion_quality(int q, aom_bit_depth_t bit_depth) {
-  int *arfgf_high_motion_minq;
-  ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
-  return arfgf_high_motion_minq[q];
-}
-#endif
-
-static int calc_active_worst_quality_one_pass_vbr(const AV1_COMP *cpi) {
-  const RATE_CONTROL *const rc = &cpi->rc;
-  const unsigned int curr_frame = cpi->common.current_video_frame;
-  int active_worst_quality;
-
-  if (cpi->common.frame_type == KEY_FRAME) {
-    active_worst_quality =
-        curr_frame == 0 ? rc->worst_quality : rc->last_q[KEY_FRAME] * 2;
-  } else {
-    if (!rc->is_src_frame_alt_ref &&
-        (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame ||
-         cpi->refresh_alt_ref_frame)) {
-      active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4
-                                             : rc->last_q[INTER_FRAME];
-    } else {
-      active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 2
-                                             : rc->last_q[INTER_FRAME] * 2;
-    }
-  }
-  return AOMMIN(active_worst_quality, rc->worst_quality);
-}
-
-// Adjust active_worst_quality level based on buffer level.
-static int calc_active_worst_quality_one_pass_cbr(const AV1_COMP *cpi) {
-  // Adjust active_worst_quality: If buffer is above the optimal/target level,
-  // bring active_worst_quality down depending on fullness of buffer.
-  // If buffer is below the optimal level, let the active_worst_quality go from
-  // ambient Q (at buffer = optimal level) to worst_quality level
-  // (at buffer = critical level).
-  const AV1_COMMON *const cm = &cpi->common;
-  const RATE_CONTROL *rc = &cpi->rc;
-  // Buffer level below which we push active_worst to worst_quality.
-  int64_t critical_level = rc->optimal_buffer_level >> 3;
-  int64_t buff_lvl_step = 0;
-  int adjustment = 0;
-  int active_worst_quality;
-  int ambient_qp;
-  if (cm->frame_type == KEY_FRAME) return rc->worst_quality;
-  // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME]
-  // for the first few frames following key frame. These are both initialized
-  // to worst_quality and updated with (3/4, 1/4) average in postencode_update.
-  // So for first few frames following key, the qp of that key frame is weighted
-  // into the active_worst_quality setting.
-  ambient_qp = (cm->current_video_frame < 5)
-                   ? AOMMIN(rc->avg_frame_qindex[INTER_FRAME],
-                            rc->avg_frame_qindex[KEY_FRAME])
-                   : rc->avg_frame_qindex[INTER_FRAME];
-  active_worst_quality = AOMMIN(rc->worst_quality, ambient_qp * 5 / 4);
-  if (rc->buffer_level > rc->optimal_buffer_level) {
-    // Adjust down.
-    // Maximum limit for down adjustment, ~30%.
-    int max_adjustment_down = active_worst_quality / 3;
-    if (max_adjustment_down) {
-      buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) /
-                       max_adjustment_down);
-      if (buff_lvl_step)
-        adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) /
-                           buff_lvl_step);
-      active_worst_quality -= adjustment;
-    }
-  } else if (rc->buffer_level > critical_level) {
-    // Adjust up from ambient Q.
-    if (critical_level) {
-      buff_lvl_step = (rc->optimal_buffer_level - critical_level);
-      if (buff_lvl_step) {
-        adjustment = (int)((rc->worst_quality - ambient_qp) *
-                           (rc->optimal_buffer_level - rc->buffer_level) /
-                           buff_lvl_step);
-      }
-      active_worst_quality = ambient_qp + adjustment;
-    }
-  } else {
-    // Set to worst_quality if buffer is below critical level.
-    active_worst_quality = rc->worst_quality;
-  }
-  return active_worst_quality;
-}
-
-static int rc_pick_q_and_bounds_one_pass_cbr(const AV1_COMP *cpi, int width,
-                                             int height, int *bottom_index,
-                                             int *top_index) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  int active_best_quality;
-  int active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
-  int q;
-  int *rtc_minq;
-  const int bit_depth = cm->seq_params.bit_depth;
-  ASSIGN_MINQ_TABLE(bit_depth, rtc_minq);
-
-  if (frame_is_intra_only(cm)) {
-    active_best_quality = rc->best_quality;
-    // Handle the special case for key frames forced when we have reached
-    // the maximum key frame interval. Here force the Q to a range
-    // based on the ambient Q to reduce the risk of popping.
-    if (rc->this_key_frame_forced) {
-      int qindex = rc->last_boosted_qindex;
-      double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
-      int delta_qindex = av1_compute_qdelta(rc, last_boosted_q,
-                                            (last_boosted_q * 0.75), bit_depth);
-      active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
-    } else if (cm->current_video_frame > 0) {
-      // not first frame of one pass and kf_boost is set
-      double q_adj_factor = 1.0;
-      double q_val;
-
-      active_best_quality =
-          get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth);
-
-      // Allow somewhat lower kf minq with small image formats.
-      if ((width * height) <= (352 * 288)) {
-        q_adj_factor -= 0.25;
-      }
-
-      // Convert the adjustment factor to a qindex delta
-      // on active_best_quality.
-      q_val = av1_convert_qindex_to_q(active_best_quality, bit_depth);
-      active_best_quality +=
-          av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth);
-    }
-  } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    // Use the lower of active_worst_quality and recent
-    // average Q as basis for GF/ARF best Q limit unless last frame was
-    // a key frame.
-    if (rc->frames_since_key > 1 &&
-        rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
-      q = rc->avg_frame_qindex[INTER_FRAME];
-    } else {
-      q = active_worst_quality;
-    }
-    active_best_quality = get_gf_active_quality(rc, q, bit_depth);
-  } else {
-    // Use the lower of active_worst_quality and recent/average Q.
-    if (cm->current_video_frame > 1) {
-      if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
-        active_best_quality = rtc_minq[rc->avg_frame_qindex[INTER_FRAME]];
-      else
-        active_best_quality = rtc_minq[active_worst_quality];
-    } else {
-      if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
-        active_best_quality = rtc_minq[rc->avg_frame_qindex[KEY_FRAME]];
-      else
-        active_best_quality = rtc_minq[active_worst_quality];
-    }
-  }
-
-  // Clip the active best and worst quality values to limits
-  active_best_quality =
-      clamp(active_best_quality, rc->best_quality, rc->worst_quality);
-  active_worst_quality =
-      clamp(active_worst_quality, active_best_quality, rc->worst_quality);
-
-  *top_index = active_worst_quality;
-  *bottom_index = active_best_quality;
-
-  // Limit Q range for the adaptive loop.
-  if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
-      !(cm->current_video_frame == 0)) {
-    int qdelta = 0;
-    aom_clear_system_state();
-    qdelta = av1_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
-                                        active_worst_quality, 2.0, bit_depth);
-    *top_index = active_worst_quality + qdelta;
-    *top_index = AOMMAX(*top_index, *bottom_index);
-  }
-
-  // Special case code to try and match quality with forced key frames
-  if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
-    q = rc->last_boosted_qindex;
-  } else {
-    q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
-                          active_worst_quality, width, height);
-    if (q > *top_index) {
-      // Special case when we are targeting the max allowed rate
-      if (rc->this_frame_target >= rc->max_frame_bandwidth)
-        *top_index = q;
-      else
-        q = *top_index;
-    }
-  }
-
-  assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
-  assert(*bottom_index <= rc->worst_quality &&
-         *bottom_index >= rc->best_quality);
-  assert(q <= rc->worst_quality && q >= rc->best_quality);
-  return q;
-}
-
-static int get_active_cq_level(const RATE_CONTROL *rc,
-                               const AV1EncoderConfig *const oxcf) {
-  static const double cq_adjust_threshold = 0.1;
-  int active_cq_level = oxcf->cq_level;
-  if (oxcf->rc_mode == AOM_CQ && rc->total_target_bits > 0) {
-    const double x = (double)rc->total_actual_bits / rc->total_target_bits;
-    if (x < cq_adjust_threshold) {
-      active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold);
-    }
-  }
-  return active_cq_level;
-}
-
-static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi, int width,
-                                             int height, int *bottom_index,
-                                             int *top_index) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  const int cq_level = get_active_cq_level(rc, oxcf);
-  int active_best_quality;
-  int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
-  int q;
-  int *inter_minq;
-  const int bit_depth = cm->seq_params.bit_depth;
-  ASSIGN_MINQ_TABLE(bit_depth, inter_minq);
-
-  if (frame_is_intra_only(cm)) {
-    if (oxcf->rc_mode == AOM_Q) {
-      const int qindex = cq_level;
-      const double q_val = av1_convert_qindex_to_q(qindex, bit_depth);
-      const int delta_qindex =
-          av1_compute_qdelta(rc, q_val, q_val * 0.25, bit_depth);
-      active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
-    } else if (rc->this_key_frame_forced) {
-      const int qindex = rc->last_boosted_qindex;
-      const double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
-      const int delta_qindex = av1_compute_qdelta(
-          rc, last_boosted_q, last_boosted_q * 0.75, bit_depth);
-      active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
-    } else {  // not first frame of one pass and kf_boost is set
-      double q_adj_factor = 1.0;
-
-      active_best_quality =
-          get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth);
-
-      // Allow somewhat lower kf minq with small image formats.
-      if ((width * height) <= (352 * 288)) {
-        q_adj_factor -= 0.25;
-      }
-
-      // Convert the adjustment factor to a qindex delta on active_best_quality.
-      {
-        const double q_val =
-            av1_convert_qindex_to_q(active_best_quality, bit_depth);
-        active_best_quality +=
-            av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth);
-      }
-    }
-  } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    // Use the lower of active_worst_quality and recent
-    // average Q as basis for GF/ARF best Q limit unless last frame was
-    // a key frame.
-    q = (rc->frames_since_key > 1 &&
-         rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
-            ? rc->avg_frame_qindex[INTER_FRAME]
-            : rc->avg_frame_qindex[KEY_FRAME];
-    // For constrained quality dont allow Q less than the cq level
-    if (oxcf->rc_mode == AOM_CQ) {
-      if (q < cq_level) q = cq_level;
-      active_best_quality = get_gf_active_quality(rc, q, bit_depth);
-      // Constrained quality use slightly lower active best.
-      active_best_quality = active_best_quality * 15 / 16;
-    } else if (oxcf->rc_mode == AOM_Q) {
-      const int qindex = cq_level;
-      const double q_val = av1_convert_qindex_to_q(qindex, bit_depth);
-      const int delta_qindex =
-          (cpi->refresh_alt_ref_frame)
-              ? av1_compute_qdelta(rc, q_val, q_val * 0.40, bit_depth)
-              : av1_compute_qdelta(rc, q_val, q_val * 0.50, bit_depth);
-      active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
-    } else {
-      active_best_quality = get_gf_active_quality(rc, q, bit_depth);
-    }
-  } else {
-    if (oxcf->rc_mode == AOM_Q) {
-      const int qindex = cq_level;
-      const double q_val = av1_convert_qindex_to_q(qindex, bit_depth);
-      const double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
-                                                     0.70, 1.0, 0.85, 1.0 };
-      const int delta_qindex = av1_compute_qdelta(
-          rc, q_val,
-          q_val * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
-          bit_depth);
-      active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
-    } else {
-      // Use the lower of active_worst_quality and recent/average Q.
-      active_best_quality = (cm->current_video_frame > 1)
-                                ? inter_minq[rc->avg_frame_qindex[INTER_FRAME]]
-                                : inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
-      // For the constrained quality mode we don't want
-      // q to fall below the cq level.
-      if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) {
-        active_best_quality = cq_level;
-      }
-    }
-  }
-
-  // Clip the active best and worst quality values to limits
-  active_best_quality =
-      clamp(active_best_quality, rc->best_quality, rc->worst_quality);
-  active_worst_quality =
-      clamp(active_worst_quality, active_best_quality, rc->worst_quality);
-
-  *top_index = active_worst_quality;
-  *bottom_index = active_best_quality;
-
-  // Limit Q range for the adaptive loop.
-  {
-    int qdelta = 0;
-    aom_clear_system_state();
-    if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
-        !(cm->current_video_frame == 0)) {
-      qdelta = av1_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
-                                          active_worst_quality, 2.0, bit_depth);
-    } else if (!rc->is_src_frame_alt_ref &&
-               (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-      qdelta = av1_compute_qdelta_by_rate(
-          &cpi->rc, cm->frame_type, active_worst_quality, 1.75, bit_depth);
-    }
-    *top_index = active_worst_quality + qdelta;
-    *top_index = AOMMAX(*top_index, *bottom_index);
-  }
-
-  if (oxcf->rc_mode == AOM_Q) {
-    q = active_best_quality;
-    // Special case code to try and match quality with forced key frames
-  } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
-    q = rc->last_boosted_qindex;
-  } else {
-    q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
-                          active_worst_quality, width, height);
-    if (q > *top_index) {
-      // Special case when we are targeting the max allowed rate
-      if (rc->this_frame_target >= rc->max_frame_bandwidth)
-        *top_index = q;
-      else
-        q = *top_index;
-    }
-  }
-
-  assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
-  assert(*bottom_index <= rc->worst_quality &&
-         *bottom_index >= rc->best_quality);
-  assert(q <= rc->worst_quality && q >= rc->best_quality);
-  return q;
-}
-
-int av1_frame_type_qdelta(const AV1_COMP *cpi, int rf_level, int q) {
-  static const FRAME_TYPE frame_type[RATE_FACTOR_LEVELS] = {
-    INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, KEY_FRAME
-  };
-  const AV1_COMMON *const cm = &cpi->common;
-  int qdelta = av1_compute_qdelta_by_rate(&cpi->rc, frame_type[rf_level], q,
-                                          rate_factor_deltas[rf_level],
-                                          cm->seq_params.bit_depth);
-  return qdelta;
-}
-
-#define STATIC_MOTION_THRESH 95
-static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
-                                         int height, int *bottom_index,
-                                         int *top_index, int *arf_q) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  const GF_GROUP *gf_group = &cpi->twopass.gf_group;
-  const int cq_level = get_active_cq_level(rc, oxcf);
-  int active_best_quality;
-  int active_worst_quality = cpi->twopass.active_worst_quality;
-  int q;
-  int *inter_minq;
-  const int bit_depth = cm->seq_params.bit_depth;
-  ASSIGN_MINQ_TABLE(bit_depth, inter_minq);
-
-#if CUSTOMIZED_GF
-  const int is_intrl_arf_boost =
-      gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE;
-#else
-  const int is_intrl_arf_boost = cpi->refresh_alt2_ref_frame;
-#endif  // CUSTOMIZED_GF
-
-  if (frame_is_intra_only(cm)) {
-    // Handle the special case for key frames forced when we have reached
-    // the maximum key frame interval. Here force the Q to a range
-    // based on the ambient Q to reduce the risk of popping.
-    if (rc->this_key_frame_forced) {
-      double last_boosted_q;
-      int delta_qindex;
-      int qindex;
-
-      if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
-        qindex = AOMMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
-        active_best_quality = qindex;
-        last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
-        delta_qindex = av1_compute_qdelta(rc, last_boosted_q,
-                                          last_boosted_q * 1.25, bit_depth);
-        active_worst_quality =
-            AOMMIN(qindex + delta_qindex, active_worst_quality);
-      } else {
-        qindex = rc->last_boosted_qindex;
-        last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
-        delta_qindex = av1_compute_qdelta(rc, last_boosted_q,
-                                          last_boosted_q * 0.5, bit_depth);
-        active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
-      }
-    } else {
-      // Not forced keyframe.
-      double q_adj_factor = 1.0;
-      double q_val;
-
-      // Baseline value derived from cpi->active_worst_quality and kf boost.
-      active_best_quality =
-          get_kf_active_quality(rc, active_worst_quality, bit_depth);
-
-      // Allow somewhat lower kf minq with small image formats.
-      if ((width * height) <= (352 * 288)) {
-        q_adj_factor -= 0.25;
-      }
-
-      // Make a further adjustment based on the kf zero motion measure.
-      q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
-
-      // Convert the adjustment factor to a qindex delta
-      // on active_best_quality.
-      q_val = av1_convert_qindex_to_q(active_best_quality, bit_depth);
-      active_best_quality +=
-          av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth);
-    }
-  } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || is_intrl_arf_boost ||
-              cpi->refresh_alt_ref_frame)) {
-    // Use the lower of active_worst_quality and recent
-    // average Q as basis for GF/ARF best Q limit unless last frame was
-    // a key frame.
-    if (rc->frames_since_key > 1 &&
-        rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
-      q = rc->avg_frame_qindex[INTER_FRAME];
-    } else {
-      q = active_worst_quality;
-    }
-    // For constrained quality dont allow Q less than the cq level
-    if (oxcf->rc_mode == AOM_CQ) {
-      if (q < cq_level) q = cq_level;
-#if USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
-      if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
-          (is_intrl_arf_boost && !cpi->new_bwdref_update_rule)) {
-#endif  // USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
-        active_best_quality = get_gf_active_quality(rc, q, bit_depth);
-
-        // Constrained quality use slightly lower active best.
-        active_best_quality = active_best_quality * 15 / 16;
-#if REDUCE_LAST_ALT_BOOST
-        if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
-          const int min_boost = get_gf_high_motion_quality(q, bit_depth);
-          const int boost = min_boost - active_best_quality;
-
-          active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
-        }
-#endif
-        *arf_q = active_best_quality;
-#if USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
-      } else {
-        active_best_quality = rc->arf_q;
-        int this_height = gf_group->pyramid_level[gf_group->index];
-        while (this_height < gf_group->pyramid_height) {
-          active_best_quality = (active_best_quality + cq_level + 1) / 2;
-          ++this_height;
-        }
-      }
-#endif  // USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ
-    } else if (oxcf->rc_mode == AOM_Q) {
-      if (!cpi->refresh_alt_ref_frame && !is_intrl_arf_boost) {
-        active_best_quality = cq_level;
-      } else {
-        if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
-          active_best_quality = get_gf_active_quality(rc, q, bit_depth);
-          *arf_q = active_best_quality;
-#if REDUCE_LAST_ALT_BOOST
-          const int min_boost = get_gf_high_motion_quality(q, bit_depth);
-          const int boost = min_boost - active_best_quality;
-
-          active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
-#endif
-        } else {
-          active_best_quality = rc->arf_q;
-        }
-#if USE_SYMM_MULTI_LAYER
-        if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
-          int this_height = gf_group->pyramid_level[gf_group->index];
-          while (this_height < gf_group->pyramid_height) {
-            active_best_quality = (active_best_quality + cq_level + 1) / 2;
-            ++this_height;
-          }
-        } else {
-#endif
-          // Modify best quality for second level arfs. For mode AOM_Q this
-          // becomes the baseline frame q.
-          if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
-            active_best_quality = (active_best_quality + cq_level + 1) / 2;
-#if USE_SYMM_MULTI_LAYER
-        }
-#endif
-      }
-    } else {
-      active_best_quality = get_gf_active_quality(rc, q, bit_depth);
-#if REDUCE_LAST_ALT_BOOST
-      const int min_boost = get_gf_high_motion_quality(q, bit_depth);
-      const int boost = min_boost - active_best_quality;
-
-      active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
-#endif
-#if USE_SYMM_MULTI_LAYER
-      if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) {
-        int this_height = gf_group->pyramid_level[gf_group->index];
-        while (this_height < gf_group->pyramid_height) {
-          active_best_quality =
-              (active_best_quality + active_worst_quality + 1) / 2;
-          ++this_height;
-        }
-      }
-#endif
-    }
-  } else {
-    if (oxcf->rc_mode == AOM_Q) {
-      active_best_quality = cq_level;
-    } else {
-      active_best_quality = inter_minq[active_worst_quality];
-
-      // For the constrained quality mode we don't want
-      // q to fall below the cq level.
-      if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) {
-        active_best_quality = cq_level;
-      }
-    }
-  }
-
-  // Extension to max or min Q if undershoot or overshoot is outside
-  // the permitted range.
-  if ((cpi->oxcf.rc_mode != AOM_Q) &&
-      (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD)) {
-    if (frame_is_intra_only(cm) ||
-        (!rc->is_src_frame_alt_ref &&
-         (cpi->refresh_golden_frame || is_intrl_arf_boost ||
-          cpi->refresh_alt_ref_frame))) {
-      active_best_quality -=
-          (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
-      active_worst_quality += (cpi->twopass.extend_maxq / 2);
-    } else {
-      active_best_quality -=
-          (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
-      active_worst_quality += cpi->twopass.extend_maxq;
-    }
-  }
-
-  aom_clear_system_state();
-  // Static forced key frames Q restrictions dealt with elsewhere.
-  if (!(frame_is_intra_only(cm)) || !rc->this_key_frame_forced ||
-      (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
-    int qdelta = av1_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index],
-                                       active_worst_quality);
-    active_worst_quality =
-        AOMMAX(active_worst_quality + qdelta, active_best_quality);
-  }
-
-  // Modify active_best_quality for downscaled normal frames.
-  if (av1_frame_scaled(cm) && !frame_is_kf_gf_arf(cpi)) {
-    int qdelta = av1_compute_qdelta_by_rate(
-        rc, cm->frame_type, active_best_quality, 2.0, bit_depth);
-    active_best_quality =
-        AOMMAX(active_best_quality + qdelta, rc->best_quality);
-  }
-
-  active_best_quality =
-      clamp(active_best_quality, rc->best_quality, rc->worst_quality);
-  active_worst_quality =
-      clamp(active_worst_quality, active_best_quality, rc->worst_quality);
-
-  if (oxcf->rc_mode == AOM_Q) {
-    q = active_best_quality;
-    // Special case code to try and match quality with forced key frames.
-  } else if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
-    // If static since last kf use better of last boosted and last kf q.
-    if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
-      q = AOMMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
-    } else {
-      q = AOMMIN(rc->last_boosted_qindex,
-                 (active_best_quality + active_worst_quality) / 2);
-    }
-  } else {
-    q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
-                          active_worst_quality, width, height);
-    if (q > active_worst_quality) {
-      // Special case when we are targeting the max allowed rate.
-      if (rc->this_frame_target >= rc->max_frame_bandwidth)
-        active_worst_quality = q;
-      else
-        q = active_worst_quality;
-    }
-  }
-  clamp(q, active_best_quality, active_worst_quality);
-
-  *top_index = active_worst_quality;
-  *bottom_index = active_best_quality;
-
-  assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
-  assert(*bottom_index <= rc->worst_quality &&
-         *bottom_index >= rc->best_quality);
-  assert(q <= rc->worst_quality && q >= rc->best_quality);
-  return q;
-}
-
-int av1_rc_pick_q_and_bounds(AV1_COMP *cpi, int width, int height,
-                             int *bottom_index, int *top_index) {
-  int q;
-  if (cpi->oxcf.pass == 0) {
-    if (cpi->oxcf.rc_mode == AOM_CBR)
-      q = rc_pick_q_and_bounds_one_pass_cbr(cpi, width, height, bottom_index,
-                                            top_index);
-    else
-      q = rc_pick_q_and_bounds_one_pass_vbr(cpi, width, height, bottom_index,
-                                            top_index);
-  } else {
-    assert(cpi->oxcf.pass == 2 && "invalid encode pass");
-
-    GF_GROUP *gf_group = &cpi->twopass.gf_group;
-    int arf_q = 0;
-
-    q = rc_pick_q_and_bounds_two_pass(cpi, width, height, bottom_index,
-                                      top_index, &arf_q);
-
-    if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
-      cpi->rc.arf_q = arf_q;
-    }
-  }
-
-  return q;
-}
-
-void av1_rc_compute_frame_size_bounds(const AV1_COMP *cpi, int frame_target,
-                                      int *frame_under_shoot_limit,
-                                      int *frame_over_shoot_limit) {
-  if (cpi->oxcf.rc_mode == AOM_Q) {
-    *frame_under_shoot_limit = 0;
-    *frame_over_shoot_limit = INT_MAX;
-  } else {
-    // For very small rate targets where the fractional adjustment
-    // may be tiny make sure there is at least a minimum range.
-    const int tolerance = (cpi->sf.recode_tolerance * frame_target) / 100;
-    *frame_under_shoot_limit = AOMMAX(frame_target - tolerance - 200, 0);
-    *frame_over_shoot_limit =
-        AOMMIN(frame_target + tolerance + 200, cpi->rc.max_frame_bandwidth);
-  }
-}
-
-static void rc_set_frame_target(AV1_COMP *cpi, int target, int width,
-                                int height) {
-  const AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-
-  rc->this_frame_target = target;
-
-  // Modify frame size target when down-scaled.
-  if (av1_frame_scaled(cm))
-    rc->this_frame_target =
-        (int)(rc->this_frame_target * resize_rate_factor(cpi, width, height));
-
-  // Target rate per SB64 (including partial SB64s.
-  rc->sb64_target_rate =
-      (int)((int64_t)rc->this_frame_target * 64 * 64) / (width * height);
-}
-
-static void update_alt_ref_frame_stats(AV1_COMP *cpi) {
-  // this frame refreshes means next frames don't unless specified by user
-  RATE_CONTROL *const rc = &cpi->rc;
-  rc->frames_since_golden = 0;
-
-  // Mark the alt ref as done (setting to 0 means no further alt refs pending).
-  rc->source_alt_ref_pending = 0;
-
-  // Set the alternate reference frame active flag
-  rc->source_alt_ref_active = 1;
-}
-
-static void update_golden_frame_stats(AV1_COMP *cpi) {
-  RATE_CONTROL *const rc = &cpi->rc;
-#if CUSTOMIZED_GF
-  const TWO_PASS *const twopass = &cpi->twopass;
-  const GF_GROUP *const gf_group = &twopass->gf_group;
-  const int is_intrnl_arf =
-      cpi->oxcf.pass == 2
-          ? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
-          : cpi->refresh_alt2_ref_frame;
-#else
-  const int is_intnl_arf = cpi->refresh_alt2_ref_frame;
-#endif
-
-  // Update the Golden frame usage counts.
-  // NOTE(weitinglin): If we use show_existing_frame for an OVERLAY frame,
-  //                   only the virtual indices for the reference frame will be
-  //                   updated and cpi->refresh_golden_frame will still be zero.
-  if (cpi->refresh_golden_frame || rc->is_src_frame_alt_ref) {
-    // We will not use internal overlay frames to replace the golden frame
-    if (!rc->is_src_frame_ext_arf)
-      // this frame refreshes means next frames don't unless specified by user
-      rc->frames_since_golden = 0;
-
-    // If we are not using alt ref in the up and coming group clear the arf
-    // active flag. In multi arf group case, if the index is not 0 then
-    // we are overlaying a mid group arf so should not reset the flag.
-    if (cpi->oxcf.pass == 2) {
-      if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0))
-        rc->source_alt_ref_active = 0;
-    } else if (!rc->source_alt_ref_pending) {
-      rc->source_alt_ref_active = 0;
-    }
-  } else if (!cpi->refresh_alt_ref_frame && !is_intrnl_arf) {
-    rc->frames_since_golden++;
-  }
-}
-
-void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
-  const AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-#if CUSTOMIZED_GF
-  const TWO_PASS *const twopass = &cpi->twopass;
-  const GF_GROUP *const gf_group = &twopass->gf_group;
-  const int is_intrnl_arf =
-      cpi->oxcf.pass == 2
-          ? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
-          : cpi->refresh_alt2_ref_frame;
-#else
-  const int is_intrnl_arf = cpi->refresh_alt2_ref_frame;
-#endif
-
-  const int qindex = cm->base_qindex;
-
-  if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
-    av1_cyclic_refresh_postencode(cpi);
-  }
-
-  // Update rate control heuristics
-  rc->projected_frame_size = (int)(bytes_used << 3);
-
-  // Post encode loop adjustment of Q prediction.
-  av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
-
-  // Keep a record of last Q and ambient average Q.
-  if (cm->frame_type == KEY_FRAME) {
-    rc->last_q[KEY_FRAME] = qindex;
-    rc->avg_frame_qindex[KEY_FRAME] =
-        ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
-  } else {
-    if (!rc->is_src_frame_alt_ref &&
-        !(cpi->refresh_golden_frame || is_intrnl_arf ||
-          cpi->refresh_alt_ref_frame)) {
-      rc->last_q[INTER_FRAME] = qindex;
-      rc->avg_frame_qindex[INTER_FRAME] =
-          ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
-      rc->ni_frames++;
-      rc->tot_q += av1_convert_qindex_to_q(qindex, cm->seq_params.bit_depth);
-      rc->avg_q = rc->tot_q / rc->ni_frames;
-      // Calculate the average Q for normal inter frames (not key or GFU
-      // frames).
-      rc->ni_tot_qi += qindex;
-      rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
-    }
-  }
-
-  // Keep record of last boosted (KF/GF/ARF) Q value.
-  // If the current frame is coded at a lower Q then we also update it.
-  // If all mbs in this group are skipped only update if the Q value is
-  // better than that already stored.
-  // This is used to help set quality in forced key frames to reduce popping
-  if ((qindex < rc->last_boosted_qindex) || (cm->frame_type == KEY_FRAME) ||
-      (!rc->constrained_gf_group &&
-       (cpi->refresh_alt_ref_frame || is_intrnl_arf ||
-        (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
-    rc->last_boosted_qindex = qindex;
-  }
-  if (cm->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex;
-
-  update_buffer_level(cpi, rc->projected_frame_size);
-
-  // Rolling monitors of whether we are over or underspending used to help
-  // regulate min and Max Q in two pass.
-  if (av1_frame_scaled(cm))
-    rc->this_frame_target =
-        (int)(rc->this_frame_target /
-              resize_rate_factor(cpi, cm->width, cm->height));
-  if (cm->frame_type != KEY_FRAME) {
-    rc->rolling_target_bits = ROUND_POWER_OF_TWO(
-        rc->rolling_target_bits * 3 + rc->this_frame_target, 2);
-    rc->rolling_actual_bits = ROUND_POWER_OF_TWO(
-        rc->rolling_actual_bits * 3 + rc->projected_frame_size, 2);
-    rc->long_rolling_target_bits = ROUND_POWER_OF_TWO(
-        rc->long_rolling_target_bits * 31 + rc->this_frame_target, 5);
-    rc->long_rolling_actual_bits = ROUND_POWER_OF_TWO(
-        rc->long_rolling_actual_bits * 31 + rc->projected_frame_size, 5);
-  }
-
-  // Actual bits spent
-  rc->total_actual_bits += rc->projected_frame_size;
-  // TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME
-  //               differently here for rc->avg_frame_bandwidth.
-  rc->total_target_bits +=
-      (cm->show_frame || rc->is_bwd_ref_frame) ? rc->avg_frame_bandwidth : 0;
-
-  rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
-
-  if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
-      (cm->frame_type != KEY_FRAME))
-    // Update the alternate reference frame stats as appropriate.
-    update_alt_ref_frame_stats(cpi);
-  else
-    // Update the Golden frame stats as appropriate.
-    update_golden_frame_stats(cpi);
-
-  if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0;
-  // if (cm->current_video_frame == 1 && cm->show_frame)
-  /*
-  rc->this_frame_target =
-      (int)(rc->this_frame_target / resize_rate_factor(cpi, cm->width,
-  cm->height));
-      */
-}
-
-void av1_rc_postencode_update_drop_frame(AV1_COMP *cpi) {
-  // Update buffer level with zero size, update frame counters, and return.
-  update_buffer_level(cpi, 0);
-  cpi->rc.frames_since_key++;
-  cpi->rc.frames_to_key--;
-  cpi->rc.rc_2_frame = 0;
-  cpi->rc.rc_1_frame = 0;
-}
-
-// Use this macro to turn on/off use of alt-refs in one-pass mode.
-#define USE_ALTREF_FOR_ONE_PASS 1
-
-static int calc_pframe_target_size_one_pass_vbr(const AV1_COMP *const cpi) {
-  static const int af_ratio = 10;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  int target;
-#if USE_ALTREF_FOR_ONE_PASS
-  target =
-      (!rc->is_src_frame_alt_ref &&
-       (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))
-          ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio) /
-                (rc->baseline_gf_interval + af_ratio - 1)
-          : (rc->avg_frame_bandwidth * rc->baseline_gf_interval) /
-                (rc->baseline_gf_interval + af_ratio - 1);
-#else
-  target = rc->avg_frame_bandwidth;
-#endif
-  return av1_rc_clamp_pframe_target_size(cpi, target);
-}
-
-static int calc_iframe_target_size_one_pass_vbr(const AV1_COMP *const cpi) {
-  static const int kf_ratio = 25;
-  const RATE_CONTROL *rc = &cpi->rc;
-  const int target = rc->avg_frame_bandwidth * kf_ratio;
-  return av1_rc_clamp_iframe_target_size(cpi, target);
-}
-
-void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-  int target;
-  int altref_enabled = is_altref_enabled(cpi);
-  int sframe_dist = cpi->oxcf.sframe_dist;
-  int sframe_mode = cpi->oxcf.sframe_mode;
-  int sframe_enabled = cpi->oxcf.sframe_enabled;
-  // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
-  if (!cpi->refresh_alt_ref_frame &&
-      (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
-       rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
-    cm->frame_type = KEY_FRAME;
-    rc->this_key_frame_forced =
-        cm->current_video_frame != 0 && rc->frames_to_key == 0;
-    rc->frames_to_key = cpi->oxcf.key_freq;
-    rc->kf_boost = DEFAULT_KF_BOOST;
-    rc->source_alt_ref_active = 0;
-  } else {
-    cm->frame_type = INTER_FRAME;
-    if (sframe_enabled) {
-      if (altref_enabled) {
-        if (sframe_mode == 1) {
-          // sframe_mode == 1: insert sframe if it matches altref frame.
-
-          if (cm->current_video_frame % sframe_dist == 0 &&
-              cm->frame_type != KEY_FRAME && cm->current_video_frame != 0 &&
-              cpi->refresh_alt_ref_frame) {
-            cm->frame_type = S_FRAME;
-          }
-        } else {
-          // sframe_mode != 1: if sframe will be inserted at the next available
-          // altref frame
-
-          if (cm->current_video_frame % sframe_dist == 0 &&
-              cm->frame_type != KEY_FRAME && cm->current_video_frame != 0) {
-            rc->sframe_due = 1;
-          }
-
-          if (rc->sframe_due && cpi->refresh_alt_ref_frame) {
-            cm->frame_type = S_FRAME;
-            rc->sframe_due = 0;
-          }
-        }
-      } else {
-        if (cm->current_video_frame % sframe_dist == 0 &&
-            cm->frame_type != KEY_FRAME && cm->current_video_frame != 0) {
-          cm->frame_type = S_FRAME;
-        }
-      }
-    }
-  }
-  if (rc->frames_till_gf_update_due == 0) {
-    rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
-    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-    // NOTE: frames_till_gf_update_due must be <= frames_to_key.
-    if (rc->frames_till_gf_update_due > rc->frames_to_key) {
-      rc->frames_till_gf_update_due = rc->frames_to_key;
-      rc->constrained_gf_group = 1;
-    } else {
-      rc->constrained_gf_group = 0;
-    }
-    cpi->refresh_golden_frame = 1;
-    rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
-    rc->gfu_boost = DEFAULT_GF_BOOST;
-  }
-
-  if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
-    av1_cyclic_refresh_update_parameters(cpi);
-
-  if (cm->frame_type == KEY_FRAME)
-    target = calc_iframe_target_size_one_pass_vbr(cpi);
-  else
-    target = calc_pframe_target_size_one_pass_vbr(cpi);
-  rc_set_frame_target(cpi, target, cm->width, cm->height);
-}
-
-static int calc_pframe_target_size_one_pass_cbr(const AV1_COMP *cpi) {
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  const RATE_CONTROL *rc = &cpi->rc;
-  const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
-  const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
-  int min_frame_target =
-      AOMMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
-  int target;
-
-  if (oxcf->gf_cbr_boost_pct) {
-    const int af_ratio_pct = oxcf->gf_cbr_boost_pct + 100;
-    target = cpi->refresh_golden_frame
-                 ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval *
-                    af_ratio_pct) /
-                       (rc->baseline_gf_interval * 100 + af_ratio_pct - 100)
-                 : (rc->avg_frame_bandwidth * rc->baseline_gf_interval * 100) /
-                       (rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
-  } else {
-    target = rc->avg_frame_bandwidth;
-  }
-
-  if (diff > 0) {
-    // Lower the target bandwidth for this frame.
-    const int pct_low = (int)AOMMIN(diff / one_pct_bits, oxcf->under_shoot_pct);
-    target -= (target * pct_low) / 200;
-  } else if (diff < 0) {
-    // Increase the target bandwidth for this frame.
-    const int pct_high =
-        (int)AOMMIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
-    target += (target * pct_high) / 200;
-  }
-  if (oxcf->rc_max_inter_bitrate_pct) {
-    const int max_rate =
-        rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100;
-    target = AOMMIN(target, max_rate);
-  }
-  return AOMMAX(min_frame_target, target);
-}
-
-static int calc_iframe_target_size_one_pass_cbr(const AV1_COMP *cpi) {
-  const RATE_CONTROL *rc = &cpi->rc;
-  int target;
-  if (cpi->common.current_video_frame == 0) {
-    target = ((rc->starting_buffer_level / 2) > INT_MAX)
-                 ? INT_MAX
-                 : (int)(rc->starting_buffer_level / 2);
-  } else {
-    int kf_boost = 32;
-    double framerate = cpi->framerate;
-
-    kf_boost = AOMMAX(kf_boost, (int)(2 * framerate - 16));
-    if (rc->frames_since_key < framerate / 2) {
-      kf_boost = (int)(kf_boost * rc->frames_since_key / (framerate / 2));
-    }
-    target = ((16 + kf_boost) * rc->avg_frame_bandwidth) >> 4;
-  }
-  return av1_rc_clamp_iframe_target_size(cpi, target);
-}
-
-void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-  int target;
-  // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
-  if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
-       rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
-    cm->frame_type = KEY_FRAME;
-    rc->this_key_frame_forced =
-        cm->current_video_frame != 0 && rc->frames_to_key == 0;
-    rc->frames_to_key = cpi->oxcf.key_freq;
-    rc->kf_boost = DEFAULT_KF_BOOST;
-    rc->source_alt_ref_active = 0;
-  } else {
-    cm->frame_type = INTER_FRAME;
-  }
-  if (rc->frames_till_gf_update_due == 0) {
-    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
-      av1_cyclic_refresh_set_golden_update(cpi);
-    else
-      rc->baseline_gf_interval =
-          (rc->min_gf_interval + rc->max_gf_interval) / 2;
-    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-    // NOTE: frames_till_gf_update_due must be <= frames_to_key.
-    if (rc->frames_till_gf_update_due > rc->frames_to_key)
-      rc->frames_till_gf_update_due = rc->frames_to_key;
-    cpi->refresh_golden_frame = 1;
-    rc->gfu_boost = DEFAULT_GF_BOOST;
-  }
-
-  // Any update/change of global cyclic refresh parameters (amount/delta-qp)
-  // should be done here, before the frame qp is selected.
-  if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
-    av1_cyclic_refresh_update_parameters(cpi);
-
-  if (cm->frame_type == KEY_FRAME)
-    target = calc_iframe_target_size_one_pass_cbr(cpi);
-  else
-    target = calc_pframe_target_size_one_pass_cbr(cpi);
-
-  rc_set_frame_target(cpi, target, cm->width, cm->height);
-  // TODO(afergs): Decide whether to scale up, down, or not at all
-}
-
-int av1_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
-                       aom_bit_depth_t bit_depth) {
-  int start_index = rc->worst_quality;
-  int target_index = rc->worst_quality;
-  int i;
-
-  // Convert the average q value to an index.
-  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
-    start_index = i;
-    if (av1_convert_qindex_to_q(i, bit_depth) >= qstart) break;
-  }
-
-  // Convert the q target to an index
-  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
-    target_index = i;
-    if (av1_convert_qindex_to_q(i, bit_depth) >= qtarget) break;
-  }
-
-  return target_index - start_index;
-}
-
-int av1_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
-                               int qindex, double rate_target_ratio,
-                               aom_bit_depth_t bit_depth) {
-  int target_index = rc->worst_quality;
-  int i;
-
-  // Look up the current projected bits per block for the base index
-  const int base_bits_per_mb =
-      av1_rc_bits_per_mb(frame_type, qindex, 1.0, bit_depth);
-
-  // Find the target bits per mb based on the base value and given ratio.
-  const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
-
-  // Convert the q target to an index
-  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
-    if (av1_rc_bits_per_mb(frame_type, i, 1.0, bit_depth) <=
-        target_bits_per_mb) {
-      target_index = i;
-      break;
-    }
-  }
-  return target_index - qindex;
-}
-
-void av1_rc_set_gf_interval_range(const AV1_COMP *const cpi,
-                                  RATE_CONTROL *const rc) {
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-
-  // Special case code for 1 pass fixed Q mode tests
-  if ((oxcf->pass == 0) && (oxcf->rc_mode == AOM_Q)) {
-    rc->max_gf_interval = FIXED_GF_INTERVAL;
-    rc->min_gf_interval = FIXED_GF_INTERVAL;
-    rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL;
-  } else {
-    // Set Maximum gf/arf interval
-    rc->max_gf_interval = oxcf->max_gf_interval;
-    rc->min_gf_interval = oxcf->min_gf_interval;
-    if (rc->min_gf_interval == 0)
-      rc->min_gf_interval = av1_rc_get_default_min_gf_interval(
-          oxcf->width, oxcf->height, cpi->framerate);
-    if (rc->max_gf_interval == 0)
-      rc->max_gf_interval = av1_rc_get_default_max_gf_interval(
-          cpi->framerate, rc->min_gf_interval);
-
-    // Extended interval for genuinely static scenes
-    rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
-
-    if (is_altref_enabled(cpi)) {
-      if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
-        rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
-    }
-
-    if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
-      rc->max_gf_interval = rc->static_scene_max_gf_interval;
-
-    // Clamp min to max
-    rc->min_gf_interval = AOMMIN(rc->min_gf_interval, rc->max_gf_interval);
-  }
-}
-
-void av1_rc_update_framerate(AV1_COMP *cpi, int width, int height) {
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  RATE_CONTROL *const rc = &cpi->rc;
-  int vbr_max_bits;
-  const int MBs = av1_get_MBs(width, height);
-
-  rc->avg_frame_bandwidth = (int)(oxcf->target_bandwidth / cpi->framerate);
-  rc->min_frame_bandwidth =
-      (int)(rc->avg_frame_bandwidth * oxcf->two_pass_vbrmin_section / 100);
-
-  rc->min_frame_bandwidth =
-      AOMMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
-
-  // A maximum bitrate for a frame is defined.
-  // The baseline for this aligns with HW implementations that
-  // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
-  // per 16x16 MB (averaged over a frame). However this limit is extended if
-  // a very high rate is given on the command line or the the rate cannnot
-  // be acheived because of a user specificed max q (e.g. when the user
-  // specifies lossless encode.
-  vbr_max_bits =
-      (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
-            100);
-  rc->max_frame_bandwidth =
-      AOMMAX(AOMMAX((MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
-
-  av1_rc_set_gf_interval_range(cpi, rc);
-}
-
-#define VBR_PCT_ADJUSTMENT_LIMIT 50
-// For VBR...adjustment to the frame target based on error from previous frames
-static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) {
-  RATE_CONTROL *const rc = &cpi->rc;
-  int64_t vbr_bits_off_target = rc->vbr_bits_off_target;
-  int max_delta;
-  double position_factor = 1.0;
-
-  // How far through the clip are we.
-  // This number is used to damp the per frame rate correction.
-  // Range 0 - 1.0
-  if (cpi->twopass.total_stats.count != 0.) {
-    position_factor = sqrt((double)cpi->common.current_video_frame /
-                           cpi->twopass.total_stats.count);
-  }
-  max_delta = (int)(position_factor *
-                    ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100));
-
-  // vbr_bits_off_target > 0 means we have extra bits to spend
-  if (vbr_bits_off_target > 0) {
-    *this_frame_target += (vbr_bits_off_target > max_delta)
-                              ? max_delta
-                              : (int)vbr_bits_off_target;
-  } else {
-    *this_frame_target -= (vbr_bits_off_target < -max_delta)
-                              ? max_delta
-                              : (int)-vbr_bits_off_target;
-  }
-
-  // Fast redistribution of bits arising from massive local undershoot.
-  // Dont do it for kf,arf,gf or overlay frames.
-  if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref &&
-      rc->vbr_bits_off_target_fast) {
-    int one_frame_bits = AOMMAX(rc->avg_frame_bandwidth, *this_frame_target);
-    int fast_extra_bits;
-    fast_extra_bits = (int)AOMMIN(rc->vbr_bits_off_target_fast, one_frame_bits);
-    fast_extra_bits = (int)AOMMIN(
-        fast_extra_bits,
-        AOMMAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
-    *this_frame_target += (int)fast_extra_bits;
-    rc->vbr_bits_off_target_fast -= fast_extra_bits;
-  }
-}
-
-void av1_set_target_rate(AV1_COMP *cpi, int width, int height) {
-  RATE_CONTROL *const rc = &cpi->rc;
-  int target_rate = rc->base_frame_target;
-
-  // Correction to rate target based on prior over or under shoot.
-  if (cpi->oxcf.rc_mode == AOM_VBR || cpi->oxcf.rc_mode == AOM_CQ)
-    vbr_rate_correction(cpi, &target_rate);
-  rc_set_frame_target(cpi, target_rate, width, height);
-}
diff --git a/third_party/aom/av1/encoder/ratectrl.h b/third_party/aom/av1/encoder/ratectrl.h
deleted file mode 100644
index 198ecab97..000000000
--- a/third_party/aom/av1/encoder/ratectrl.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RATECTRL_H_
-#define AOM_AV1_ENCODER_RATECTRL_H_
-
-#include "aom/aom_codec.h"
-#include "aom/aom_integer.h"
-
-#include "av1/common/blockd.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Bits Per MB at different Q (Multiplied by 512)
-#define BPER_MB_NORMBITS 9
-
-#define CUSTOMIZED_GF 1
-
-#if CONFIG_FIX_GF_LENGTH
-#define FIXED_GF_LENGTH 16
-#define MAX_PYRAMID_LVL 4
-// We allow a frame to have at most two left/right descendants before changing
-// them into to a subtree, i.e., we allow the following structure:
-/*                    OUT_OF_ORDER_FRAME
-                     / /              \ \
-(two left children) F F                F F (two right children) */
-// Therefore the max gf size supported by 4 layer structure is
-// 1 (KEY/OVERLAY) + 1 + 2 + 4 + 16 (two children on both side of their parent)
-#define MAX_PYRAMID_SIZE 24
-#define USE_SYMM_MULTI_LAYER 1
-#define REDUCE_LAST_ALT_BOOST 1
-#define REDUCE_LAST_GF_LENGTH 1
-#define MULTI_LVL_BOOST_VBR_CQ 1
-#else
-#define USE_SYMM_MULTI_LAYER 0
-#define REDUCE_LAST_ALT_BOOST 0
-#define REDUCE_LAST_GF_LENGTH 0
-#define MULTI_LVL_BOOST_VBR_CQ 0
-#endif
-
-#if USE_SYMM_MULTI_LAYER
-#define USE_MANUAL_GF4_STRUCT 0
-#endif
-
-#define MIN_GF_INTERVAL 4
-#define MAX_GF_INTERVAL 16
-#define FIXED_GF_INTERVAL 8  // Used in some testing modes only
-
-typedef enum {
-  INTER_NORMAL = 0,
-  INTER_LOW = 1,
-  INTER_HIGH = 2,
-  GF_ARF_LOW = 3,
-  GF_ARF_STD = 4,
-  KF_STD = 5,
-  RATE_FACTOR_LEVELS = 6
-} RATE_FACTOR_LEVEL;
-
-static const double rate_factor_deltas[RATE_FACTOR_LEVELS] = {
-  1.00,  // INTER_NORMAL
-  0.80,  // INTER_LOW
-  1.50,  // INTER_HIGH
-  1.25,  // GF_ARF_LOW
-  2.00,  // GF_ARF_STD
-  2.00,  // KF_STD
-};
-
-typedef struct {
-  int resize_width;
-  int resize_height;
-  uint8_t superres_denom;
-} size_params_type;
-
-typedef struct {
-  // Rate targetting variables
-  int base_frame_target;  // A baseline frame target before adjustment
-                          // for previous under or over shoot.
-  int this_frame_target;  // Actual frame target after rc adjustment.
-  int projected_frame_size;
-  int sb64_target_rate;
-  int last_q[FRAME_TYPES];  // Separate values for Intra/Inter
-  int last_boosted_qindex;  // Last boosted GF/KF/ARF q
-  int last_kf_qindex;       // Q index of the last key frame coded.
-
-  int gfu_boost;
-  int last_boost;
-  int kf_boost;
-
-  double rate_correction_factors[RATE_FACTOR_LEVELS];
-
-  int frames_since_golden;
-  int frames_till_gf_update_due;
-  int min_gf_interval;
-  int max_gf_interval;
-  int static_scene_max_gf_interval;
-  int baseline_gf_interval;
-  int constrained_gf_group;
-  int frames_to_key;
-  int frames_since_key;
-  int this_key_frame_forced;
-  int next_key_frame_forced;
-  int source_alt_ref_pending;
-  int source_alt_ref_active;
-  int is_src_frame_alt_ref;
-  int sframe_due;
-
-  // Length of the bi-predictive frame group interval
-  int bipred_group_interval;
-
-  // NOTE: Different types of frames may have different bits allocated
-  //       accordingly, aiming to achieve the overall optimal RD performance.
-  int is_bwd_ref_frame;
-  int is_last_bipred_frame;
-  int is_bipred_frame;
-  int is_src_frame_ext_arf;
-
-  int avg_frame_bandwidth;  // Average frame size target for clip
-  int min_frame_bandwidth;  // Minimum allocation used for any frame
-  int max_frame_bandwidth;  // Maximum burst rate allowed for a frame.
-
-  int ni_av_qi;
-  int ni_tot_qi;
-  int ni_frames;
-  int avg_frame_qindex[FRAME_TYPES];
-  double tot_q;
-  double avg_q;
-
-  int64_t buffer_level;
-  int64_t bits_off_target;
-  int64_t vbr_bits_off_target;
-  int64_t vbr_bits_off_target_fast;
-
-  int decimation_factor;
-  int decimation_count;
-
-  int rolling_target_bits;
-  int rolling_actual_bits;
-
-  int long_rolling_target_bits;
-  int long_rolling_actual_bits;
-
-  int rate_error_estimate;
-
-  int64_t total_actual_bits;
-  int64_t total_target_bits;
-  int64_t total_target_vs_actual;
-
-  int worst_quality;
-  int best_quality;
-
-  int64_t starting_buffer_level;
-  int64_t optimal_buffer_level;
-  int64_t maximum_buffer_size;
-
-  // rate control history for last frame(1) and the frame before(2).
-  // -1: undershot
-  //  1: overshoot
-  //  0: not initialized.
-  int rc_1_frame;
-  int rc_2_frame;
-  int q_1_frame;
-  int q_2_frame;
-
-  // Auto frame-scaling variables.
-  int rf_level_maxq[RATE_FACTOR_LEVELS];
-  float_t arf_boost_factor;
-  // Q index used for ALT frame
-  int arf_q;
-} RATE_CONTROL;
-
-struct AV1_COMP;
-struct AV1EncoderConfig;
-
-void av1_rc_init(const struct AV1EncoderConfig *oxcf, int pass,
-                 RATE_CONTROL *rc);
-
-int av1_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
-                           double correction_factor, aom_bit_depth_t bit_depth);
-
-double av1_convert_qindex_to_q(int qindex, aom_bit_depth_t bit_depth);
-
-void av1_rc_init_minq_luts(void);
-
-int av1_rc_get_default_min_gf_interval(int width, int height, double framerate);
-// Note av1_rc_get_default_max_gf_interval() requires the min_gf_interval to
-// be passed in to ensure that the max_gf_interval returned is at least as bis
-// as that.
-int av1_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
-
-// Generally at the high level, the following flow is expected
-// to be enforced for rate control:
-// First call per frame, one of:
-//   av1_rc_get_one_pass_vbr_params()
-//   av1_rc_get_one_pass_cbr_params()
-//   av1_rc_get_first_pass_params()
-//   av1_rc_get_second_pass_params()
-// depending on the usage to set the rate control encode parameters desired.
-//
-// Then, call encode_frame_to_data_rate() to perform the
-// actual encode. This function will in turn call encode_frame()
-// one or more times, followed by one of:
-//   av1_rc_postencode_update()
-//   av1_rc_postencode_update_drop_frame()
-//
-// The majority of rate control parameters are only expected
-// to be set in the av1_rc_get_..._params() functions and
-// updated during the av1_rc_postencode_update...() functions.
-// The only exceptions are av1_rc_drop_frame() and
-// av1_rc_update_rate_correction_factors() functions.
-
-// Functions to set parameters for encoding before the actual
-// encode_frame_to_data_rate() function.
-void av1_rc_get_one_pass_vbr_params(struct AV1_COMP *cpi);
-void av1_rc_get_one_pass_cbr_params(struct AV1_COMP *cpi);
-
-// Post encode update of the rate control parameters based
-// on bytes used
-void av1_rc_postencode_update(struct AV1_COMP *cpi, uint64_t bytes_used);
-// Post encode update of the rate control parameters for dropped frames
-void av1_rc_postencode_update_drop_frame(struct AV1_COMP *cpi);
-
-// Updates rate correction factors
-// Changes only the rate correction factors in the rate control structure.
-void av1_rc_update_rate_correction_factors(struct AV1_COMP *cpi, int width,
-                                           int height);
-
-// Decide if we should drop this frame: For 1-pass CBR.
-// Changes only the decimation count in the rate control structure
-int av1_rc_drop_frame(struct AV1_COMP *cpi);
-
-// Computes frame size bounds.
-void av1_rc_compute_frame_size_bounds(const struct AV1_COMP *cpi,
-                                      int this_frame_target,
-                                      int *frame_under_shoot_limit,
-                                      int *frame_over_shoot_limit);
-
-// Picks q and q bounds given the target for bits
-int av1_rc_pick_q_and_bounds(struct AV1_COMP *cpi, int width, int height,
-                             int *bottom_index, int *top_index);
-
-// Estimates q to achieve a target bits per frame
-int av1_rc_regulate_q(const struct AV1_COMP *cpi, int target_bits_per_frame,
-                      int active_best_quality, int active_worst_quality,
-                      int width, int height);
-
-// Estimates bits per mb for a given qindex and correction factor.
-int av1_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
-                       double correction_factor, aom_bit_depth_t bit_depth);
-
-// Clamping utilities for bitrate targets for iframes and pframes.
-int av1_rc_clamp_iframe_target_size(const struct AV1_COMP *const cpi,
-                                    int target);
-int av1_rc_clamp_pframe_target_size(const struct AV1_COMP *const cpi,
-                                    int target);
-// Utility to set frame_target into the RATE_CONTROL structure
-// This function is called only from the av1_rc_get_..._params() functions.
-void av1_rc_set_frame_target(struct AV1_COMP *cpi, int target);
-
-// Computes a q delta (in "q index" terms) to get from a starting q value
-// to a target q value
-int av1_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
-                       aom_bit_depth_t bit_depth);
-
-// Computes a q delta (in "q index" terms) to get from a starting q value
-// to a value that should equate to the given rate ratio.
-int av1_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
-                               int qindex, double rate_target_ratio,
-                               aom_bit_depth_t bit_depth);
-
-int av1_frame_type_qdelta(const struct AV1_COMP *cpi, int rf_level, int q);
-
-void av1_rc_update_framerate(struct AV1_COMP *cpi, int width, int height);
-
-void av1_rc_set_gf_interval_range(const struct AV1_COMP *const cpi,
-                                  RATE_CONTROL *const rc);
-
-void av1_set_target_rate(struct AV1_COMP *cpi, int width, int height);
-
-int av1_resize_one_pass_cbr(struct AV1_COMP *cpi);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_RATECTRL_H_
diff --git a/third_party/aom/av1/encoder/rd.c b/third_party/aom/av1/encoder/rd.c
deleted file mode 100644
index b87d89e50..000000000
--- a/third_party/aom/av1/encoder/rd.c
+++ /dev/null
@@ -1,1512 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/bitops.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-#include "av1/common/common.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/tokenize.h"
-
-#define RD_THRESH_POW 1.25
-
-// The baseline rd thresholds for breaking out of the rd loop for
-// certain modes are assumed to be based on 8x8 blocks.
-// This table is used to correct for block size.
-// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
-static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
-  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
-};
-
-static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][EXT_TX_SIZES] =
-    {
-      { 1, 1, 1, 1 },  // unused
-      { 1, 1, 0, 0 },
-      { 0, 0, 1, 0 },
-    };
-
-static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][EXT_TX_SIZES] =
-    {
-      { 1, 1, 1, 1 },  // unused
-      { 1, 1, 0, 0 },
-      { 0, 0, 1, 0 },
-      { 0, 0, 0, 1 },
-    };
-
-static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
-                                                      EXT_TX_SETS_INTER)] = {
-  {
-      // Intra
-      EXT_TX_SET_DCTONLY,
-      EXT_TX_SET_DTT4_IDTX_1DDCT,
-      EXT_TX_SET_DTT4_IDTX,
-  },
-  {
-      // Inter
-      EXT_TX_SET_DCTONLY,
-      EXT_TX_SET_ALL16,
-      EXT_TX_SET_DTT9_IDTX_1DDCT,
-      EXT_TX_SET_DCT_IDTX,
-  },
-};
-
-void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
-                         FRAME_CONTEXT *fc) {
-  int i, j;
-
-  for (i = 0; i < PARTITION_CONTEXTS; ++i)
-    av1_cost_tokens_from_cdf(x->partition_cost[i], fc->partition_cdf[i], NULL);
-
-  if (cm->skip_mode_flag) {
-    for (i = 0; i < SKIP_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->skip_mode_cost[i], fc->skip_mode_cdfs[i],
-                               NULL);
-    }
-  }
-
-  for (i = 0; i < SKIP_CONTEXTS; ++i) {
-    av1_cost_tokens_from_cdf(x->skip_cost[i], fc->skip_cdfs[i], NULL);
-  }
-
-  for (i = 0; i < KF_MODE_CONTEXTS; ++i)
-    for (j = 0; j < KF_MODE_CONTEXTS; ++j)
-      av1_cost_tokens_from_cdf(x->y_mode_costs[i][j], fc->kf_y_cdf[i][j], NULL);
-
-  for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
-    av1_cost_tokens_from_cdf(x->mbmode_cost[i], fc->y_mode_cdf[i], NULL);
-  for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
-    for (j = 0; j < INTRA_MODES; ++j)
-      av1_cost_tokens_from_cdf(x->intra_uv_mode_cost[i][j],
-                               fc->uv_mode_cdf[i][j], NULL);
-
-  av1_cost_tokens_from_cdf(x->filter_intra_mode_cost, fc->filter_intra_mode_cdf,
-                           NULL);
-  for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
-    if (av1_filter_intra_allowed_bsize(cm, i))
-      av1_cost_tokens_from_cdf(x->filter_intra_cost[i],
-                               fc->filter_intra_cdfs[i], NULL);
-  }
-
-  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-    av1_cost_tokens_from_cdf(x->switchable_interp_costs[i],
-                             fc->switchable_interp_cdf[i], NULL);
-
-  for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
-    av1_cost_tokens_from_cdf(x->palette_y_size_cost[i],
-                             fc->palette_y_size_cdf[i], NULL);
-    av1_cost_tokens_from_cdf(x->palette_uv_size_cost[i],
-                             fc->palette_uv_size_cdf[i], NULL);
-    for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
-      av1_cost_tokens_from_cdf(x->palette_y_mode_cost[i][j],
-                               fc->palette_y_mode_cdf[i][j], NULL);
-    }
-  }
-
-  for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
-    av1_cost_tokens_from_cdf(x->palette_uv_mode_cost[i],
-                             fc->palette_uv_mode_cdf[i], NULL);
-  }
-
-  for (i = 0; i < PALETTE_SIZES; ++i) {
-    for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
-      av1_cost_tokens_from_cdf(x->palette_y_color_cost[i][j],
-                               fc->palette_y_color_index_cdf[i][j], NULL);
-      av1_cost_tokens_from_cdf(x->palette_uv_color_cost[i][j],
-                               fc->palette_uv_color_index_cdf[i][j], NULL);
-    }
-  }
-
-  int sign_cost[CFL_JOINT_SIGNS];
-  av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
-  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
-    int *cost_u = x->cfl_cost[joint_sign][CFL_PRED_U];
-    int *cost_v = x->cfl_cost[joint_sign][CFL_PRED_V];
-    if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
-      memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
-    } else {
-      const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
-      av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
-    }
-    if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
-      memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
-    } else {
-      const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
-      av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
-    }
-    for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
-      cost_u[u] += sign_cost[joint_sign];
-  }
-
-  for (i = 0; i < MAX_TX_CATS; ++i)
-    for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
-      av1_cost_tokens_from_cdf(x->tx_size_cost[i][j], fc->tx_size_cdf[i][j],
-                               NULL);
-
-  for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
-    av1_cost_tokens_from_cdf(x->txfm_partition_cost[i],
-                             fc->txfm_partition_cdf[i], NULL);
-  }
-
-  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-    int s;
-    for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
-      if (use_inter_ext_tx_for_txsize[s][i]) {
-        av1_cost_tokens_from_cdf(
-            x->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
-            av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
-      }
-    }
-    for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
-      if (use_intra_ext_tx_for_txsize[s][i]) {
-        for (j = 0; j < INTRA_MODES; ++j) {
-          av1_cost_tokens_from_cdf(
-              x->intra_tx_type_costs[s][i][j], fc->intra_ext_tx_cdf[s][i][j],
-              av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
-        }
-      }
-    }
-  }
-  for (i = 0; i < DIRECTIONAL_MODES; ++i) {
-    av1_cost_tokens_from_cdf(x->angle_delta_cost[i], fc->angle_delta_cdf[i],
-                             NULL);
-  }
-  av1_cost_tokens_from_cdf(x->switchable_restore_cost,
-                           fc->switchable_restore_cdf, NULL);
-  av1_cost_tokens_from_cdf(x->wiener_restore_cost, fc->wiener_restore_cdf,
-                           NULL);
-  av1_cost_tokens_from_cdf(x->sgrproj_restore_cost, fc->sgrproj_restore_cdf,
-                           NULL);
-  av1_cost_tokens_from_cdf(x->intrabc_cost, fc->intrabc_cdf, NULL);
-
-  if (!frame_is_intra_only(cm)) {
-    for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->comp_inter_cost[i], fc->comp_inter_cdf[i],
-                               NULL);
-    }
-
-    for (i = 0; i < REF_CONTEXTS; ++i) {
-      for (j = 0; j < SINGLE_REFS - 1; ++j) {
-        av1_cost_tokens_from_cdf(x->single_ref_cost[i][j],
-                                 fc->single_ref_cdf[i][j], NULL);
-      }
-    }
-
-    for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->comp_ref_type_cost[i],
-                               fc->comp_ref_type_cdf[i], NULL);
-    }
-
-    for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
-      for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
-        av1_cost_tokens_from_cdf(x->uni_comp_ref_cost[i][j],
-                                 fc->uni_comp_ref_cdf[i][j], NULL);
-      }
-    }
-
-    for (i = 0; i < REF_CONTEXTS; ++i) {
-      for (j = 0; j < FWD_REFS - 1; ++j) {
-        av1_cost_tokens_from_cdf(x->comp_ref_cost[i][j], fc->comp_ref_cdf[i][j],
-                                 NULL);
-      }
-    }
-
-    for (i = 0; i < REF_CONTEXTS; ++i) {
-      for (j = 0; j < BWD_REFS - 1; ++j) {
-        av1_cost_tokens_from_cdf(x->comp_bwdref_cost[i][j],
-                                 fc->comp_bwdref_cdf[i][j], NULL);
-      }
-    }
-
-    for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->intra_inter_cost[i], fc->intra_inter_cdf[i],
-                               NULL);
-    }
-
-    for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->newmv_mode_cost[i], fc->newmv_cdf[i], NULL);
-    }
-
-    for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->zeromv_mode_cost[i], fc->zeromv_cdf[i], NULL);
-    }
-
-    for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->refmv_mode_cost[i], fc->refmv_cdf[i], NULL);
-    }
-
-    for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->drl_mode_cost0[i], fc->drl_cdf[i], NULL);
-    }
-    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
-      av1_cost_tokens_from_cdf(x->inter_compound_mode_cost[i],
-                               fc->inter_compound_mode_cdf[i], NULL);
-    for (i = 0; i < BLOCK_SIZES_ALL; ++i)
-      av1_cost_tokens_from_cdf(x->compound_type_cost[i],
-                               fc->compound_type_cdf[i], NULL);
-    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
-      if (get_interinter_wedge_bits(i)) {
-        av1_cost_tokens_from_cdf(x->wedge_idx_cost[i], fc->wedge_idx_cdf[i],
-                                 NULL);
-      }
-    }
-    for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
-      av1_cost_tokens_from_cdf(x->interintra_cost[i], fc->interintra_cdf[i],
-                               NULL);
-      av1_cost_tokens_from_cdf(x->interintra_mode_cost[i],
-                               fc->interintra_mode_cdf[i], NULL);
-    }
-    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
-      av1_cost_tokens_from_cdf(x->wedge_interintra_cost[i],
-                               fc->wedge_interintra_cdf[i], NULL);
-    }
-    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
-      av1_cost_tokens_from_cdf(x->motion_mode_cost[i], fc->motion_mode_cdf[i],
-                               NULL);
-    }
-    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
-      av1_cost_tokens_from_cdf(x->motion_mode_cost1[i], fc->obmc_cdf[i], NULL);
-    }
-    for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->comp_idx_cost[i], fc->compound_index_cdf[i],
-                               NULL);
-    }
-    for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
-      av1_cost_tokens_from_cdf(x->comp_group_idx_cost[i],
-                               fc->comp_group_idx_cdf[i], NULL);
-    }
-  }
-}
-
-// Values are now correlated to quantizer.
-static int sad_per_bit16lut_8[QINDEX_RANGE];
-static int sad_per_bit4lut_8[QINDEX_RANGE];
-static int sad_per_bit16lut_10[QINDEX_RANGE];
-static int sad_per_bit4lut_10[QINDEX_RANGE];
-static int sad_per_bit16lut_12[QINDEX_RANGE];
-static int sad_per_bit4lut_12[QINDEX_RANGE];
-
-static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
-                            aom_bit_depth_t bit_depth) {
-  int i;
-  // Initialize the sad lut tables using a formulaic calculation for now.
-  // This is to make it easier to resolve the impact of experimental changes
-  // to the quantizer tables.
-  for (i = 0; i < range; i++) {
-    const double q = av1_convert_qindex_to_q(i, bit_depth);
-    bit16lut[i] = (int)(0.0418 * q + 2.4107);
-    bit4lut[i] = (int)(0.063 * q + 2.742);
-  }
-}
-
-void av1_init_me_luts(void) {
-  init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
-                  AOM_BITS_8);
-  init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
-                  AOM_BITS_10);
-  init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
-                  AOM_BITS_12);
-}
-
-static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
-                                         8,  8,  4,  4,  2,  2,  1,  0 };
-static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
-  128, 144, 128, 128, 144,
-  // TODO(zoeliu): To adjust further following factor values.
-  128, 128, 128,
-  // TODO(weitinglin): We should investigate if the values should be the same
-  //                   as the value used by OVERLAY frame
-  144,  // INTNL_OVERLAY_UPDATE
-  128   // INTNL_ARF_UPDATE
-};
-
-int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
-  const int64_t q =
-      av1_dc_quant_Q3(qindex, 0, cpi->common.seq_params.bit_depth);
-  int64_t rdmult = 0;
-  switch (cpi->common.seq_params.bit_depth) {
-    case AOM_BITS_8: rdmult = 88 * q * q / 24; break;
-    case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
-    case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
-    default:
-      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
-      return -1;
-  }
-  if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
-    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-    const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
-    const int boost_index = AOMMIN(15, (cpi->rc.gfu_boost / 100));
-
-    rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
-    rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
-  }
-  if (rdmult < 1) rdmult = 1;
-  return (int)rdmult;
-}
-
-static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
-  double q;
-  switch (bit_depth) {
-    case AOM_BITS_8: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_8) / 4.0; break;
-    case AOM_BITS_10: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_10) / 16.0; break;
-    case AOM_BITS_12: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_12) / 64.0; break;
-    default:
-      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
-      return -1;
-  }
-  // TODO(debargha): Adjust the function below.
-  return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
-}
-
-void av1_initialize_me_consts(const AV1_COMP *cpi, MACROBLOCK *x, int qindex) {
-  switch (cpi->common.seq_params.bit_depth) {
-    case AOM_BITS_8:
-      x->sadperbit16 = sad_per_bit16lut_8[qindex];
-      x->sadperbit4 = sad_per_bit4lut_8[qindex];
-      break;
-    case AOM_BITS_10:
-      x->sadperbit16 = sad_per_bit16lut_10[qindex];
-      x->sadperbit4 = sad_per_bit4lut_10[qindex];
-      break;
-    case AOM_BITS_12:
-      x->sadperbit16 = sad_per_bit16lut_12[qindex];
-      x->sadperbit4 = sad_per_bit4lut_12[qindex];
-      break;
-    default:
-      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
-  }
-}
-
-static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
-  int i, bsize, segment_id;
-
-  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
-    const int qindex =
-        clamp(av1_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
-                  cm->y_dc_delta_q,
-              0, MAXQ);
-    const int q = compute_rd_thresh_factor(qindex, cm->seq_params.bit_depth);
-
-    for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
-      // Threshold here seems unnecessarily harsh but fine given actual
-      // range of values used for cpi->sf.thresh_mult[].
-      const int t = q * rd_thresh_block_size_factor[bsize];
-      const int thresh_max = INT_MAX / t;
-
-      for (i = 0; i < MAX_MODES; ++i)
-        rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
-                                                 ? rd->thresh_mult[i] * t / 4
-                                                 : INT_MAX;
-    }
-  }
-}
-
-void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx) {
-  (void)ref;
-  (void)ref_mv_idx;
-  x->mvcost = x->mv_cost_stack;
-  x->nmvjointcost = x->nmv_vec_cost;
-}
-
-void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
-                          const int num_planes) {
-  const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
-  for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
-    for (int plane = 0; plane < nplanes; ++plane) {
-      LV_MAP_EOB_COST *pcost = &x->eob_costs[eob_multi_size][plane];
-
-      for (int ctx = 0; ctx < 2; ++ctx) {
-        aom_cdf_prob *pcdf;
-        switch (eob_multi_size) {
-          case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
-          case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
-          case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
-          case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
-          case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
-          case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
-          case 6:
-          default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
-        }
-        av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
-      }
-    }
-  }
-  for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
-    for (int plane = 0; plane < nplanes; ++plane) {
-      LV_MAP_COEFF_COST *pcost = &x->coeff_costs[tx_size][plane];
-
-      for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
-        av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
-                                 fc->txb_skip_cdf[tx_size][ctx], NULL);
-
-      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
-        av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
-                                 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
-                                 NULL);
-      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
-        av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
-                                 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
-
-      for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
-        av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
-                                 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
-
-      for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
-        av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
-                                 fc->dc_sign_cdf[plane][ctx], NULL);
-
-      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
-        int br_rate[BR_CDF_SIZE];
-        int prev_cost = 0;
-        int i, j;
-        av1_cost_tokens_from_cdf(br_rate, fc->coeff_br_cdf[tx_size][plane][ctx],
-                                 NULL);
-        // printf("br_rate: ");
-        // for(j = 0; j < BR_CDF_SIZE; j++)
-        //  printf("%4d ", br_rate[j]);
-        // printf("\n");
-        for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
-          for (j = 0; j < BR_CDF_SIZE - 1; j++) {
-            pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
-          }
-          prev_cost += br_rate[j];
-        }
-        pcost->lps_cost[ctx][i] = prev_cost;
-        // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
-        // for (i = 0; i <= COEFF_BASE_RANGE; i++)
-        //  printf("%5d ", pcost->lps_cost[ctx][i]);
-        // printf("\n");
-      }
-    }
-  }
-}
-
-void av1_initialize_rd_consts(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->td.mb;
-  RD_OPT *const rd = &cpi->rd;
-
-  aom_clear_system_state();
-
-  rd->RDMULT = av1_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
-
-  set_error_per_bit(x, rd->RDMULT);
-
-  set_block_thresholds(cm, rd);
-
-  if (cm->cur_frame_force_integer_mv) {
-    av1_build_nmv_cost_table(x->nmv_vec_cost, x->nmvcost, &cm->fc->nmvc,
-                             MV_SUBPEL_NONE);
-  } else {
-    av1_build_nmv_cost_table(
-        x->nmv_vec_cost,
-        cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, &cm->fc->nmvc,
-        cm->allow_high_precision_mv);
-  }
-
-  x->mvcost = x->mv_cost_stack;
-  x->nmvjointcost = x->nmv_vec_cost;
-
-  if (frame_is_intra_only(cm) && cm->allow_screen_content_tools &&
-      cpi->oxcf.pass != 1) {
-    int *dvcost[2] = { &cpi->dv_cost[0][MV_MAX], &cpi->dv_cost[1][MV_MAX] };
-    av1_build_nmv_cost_table(cpi->dv_joint_cost, dvcost, &cm->fc->ndvc,
-                             MV_SUBPEL_NONE);
-  }
-
-  if (cpi->oxcf.pass != 1) {
-    for (int i = 0; i < TRANS_TYPES; ++i)
-      // IDENTITY: 1 bit
-      // TRANSLATION: 3 bits
-      // ROTZOOM: 2 bits
-      // AFFINE: 3 bits
-      cpi->gmtype_cost[i] = (1 + (i > 0 ? (i == ROTZOOM ? 1 : 2) : 0))
-                            << AV1_PROB_COST_SHIFT;
-  }
-}
-
-static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
-  // NOTE: The tables below must be of the same size.
-
-  // The functions described below are sampled at the four most significant
-  // bits of x^2 + 8 / 256.
-
-  // Normalized rate:
-  // This table models the rate for a Laplacian source with given variance
-  // when quantized with a uniform quantizer with given stepsize. The
-  // closed form expression is:
-  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
-  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
-  // and H(x) is the binary entropy function.
-  static const int rate_tab_q10[] = {
-    65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
-    4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
-    3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
-    2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
-    1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
-    911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
-    395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
-    73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
-    5,     3,    2,    1,    1,    1,    0,    0,
-  };
-  // Normalized distortion:
-  // This table models the normalized distortion for a Laplacian source
-  // with given variance when quantized with a uniform quantizer
-  // with given stepsize. The closed form expression is:
-  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
-  // where x = qpstep / sqrt(variance).
-  // Note the actual distortion is Dn * variance.
-  static const int dist_tab_q10[] = {
-    0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
-    5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
-    18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
-    59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
-    151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
-    375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
-    680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
-    949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
-    1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
-  };
-  static const int xsq_iq_q10[] = {
-    0,      4,      8,      12,     16,     20,     24,     28,     32,
-    40,     48,     56,     64,     72,     80,     88,     96,     112,
-    128,    144,    160,    176,    192,    208,    224,    256,    288,
-    320,    352,    384,    416,    448,    480,    544,    608,    672,
-    736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
-    1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
-    3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
-    7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
-    16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
-    36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
-    81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
-    180192, 196576, 212960, 229344, 245728,
-  };
-  const int tmp = (xsq_q10 >> 2) + 8;
-  const int k = get_msb(tmp) - 3;
-  const int xq = (k << 3) + ((tmp >> k) & 0x7);
-  const int one_q10 = 1 << 10;
-  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
-  const int b_q10 = one_q10 - a_q10;
-  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
-  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
-}
-
-void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
-                                  unsigned int qstep, int *rate,
-                                  int64_t *dist) {
-  // This function models the rate and distortion for a Laplacian
-  // source with given variance when quantized with a uniform quantizer
-  // with given stepsize. The closed form expressions are in:
-  // Hang and Chen, "Source Model for transform video coder and its
-  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
-  // Sys. for Video Tech., April 1997.
-  if (var == 0) {
-    *rate = 0;
-    *dist = 0;
-  } else {
-    int d_q10, r_q10;
-    static const uint32_t MAX_XSQ_Q10 = 245727;
-    const uint64_t xsq_q10_64 =
-        (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
-    const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
-    model_rd_norm(xsq_q10, &r_q10, &d_q10);
-    *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
-    *dist = (var * (int64_t)d_q10 + 512) >> 10;
-  }
-}
-
-static double interp_cubic(const double *p, double x) {
-  return p[1] + 0.5 * x *
-                    (p[2] - p[0] +
-                     x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
-                          x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
-}
-
-static double interp_bicubic(const double *p, int p_stride, double x,
-                             double y) {
-  double q[4];
-  q[0] = interp_cubic(p, x);
-  q[1] = interp_cubic(p + p_stride, x);
-  q[2] = interp_cubic(p + 2 * p_stride, x);
-  q[3] = interp_cubic(p + 3 * p_stride, x);
-  return interp_cubic(q, y);
-}
-
-static const double interp_rgrid_surf[65 * 18] = {
-  0.104019,    0.245714,    0.293686,    0.358635,    0.382167,    0.412446,
-  0.419955,    0.421388,    0.426672,    0.427990,    0.428531,    0.456868,
-  0.569880,    0.638822,    1.016319,    2.143453,    3.565229,    4.720880,
-  0.124618,    0.294211,    0.352023,    0.429991,    0.458206,    0.494510,
-  0.503513,    0.505232,    0.511566,    0.513234,    0.519365,    0.570225,
-  0.697373,    0.840624,    1.462198,    3.289054,    6.256517,    6.852788,
-  0.118630,    0.269669,    0.346620,    0.430999,    0.459385,    0.495783,
-  0.504808,    0.506532,    0.512884,    0.514988,    0.543437,    0.662772,
-  0.795876,    1.313596,    2.403841,    4.163098,    7.440589,    8.616275,
-  0.093329,    0.168205,    0.321320,    0.430607,    0.459385,    0.495783,
-  0.504813,    0.506548,    0.512975,    0.520662,    0.571659,    0.701841,
-  1.010727,    2.138851,    3.460626,    6.317955,    10.098127,   14.418553,
-  0.087021,    0.142905,    0.315011,    0.430509,    0.459385,    0.495787,
-  0.505075,    0.507599,    0.513584,    0.543182,    0.669941,    0.825620,
-  1.362800,    2.572187,    4.205047,    7.498399,    12.303118,   16.641735,
-  0.086923,    0.142513,    0.314913,    0.430508,    0.459385,    0.495803,
-  0.506126,    0.511816,    0.514810,    0.549705,    0.725350,    1.127334,
-  2.168597,    3.463686,    6.318605,    10.162284,   18.556041,   19.847042,
-  0.086923,    0.142513,    0.314913,    0.430506,    0.459376,    0.495805,
-  0.506388,    0.512954,    0.520772,    0.580215,    0.810474,    1.391548,
-  2.579442,    4.205160,    7.498399,    12.381597,   21.703618,   24.015457,
-  0.086923,    0.142513,    0.314911,    0.430353,    0.458765,    0.495652,
-  0.506391,    0.513406,    0.544098,    0.702950,    1.121860,    2.168961,
-  3.463798,    6.318607,    10.162284,   18.685361,   28.188192,   37.638872,
-  0.086923,    0.142513,    0.314901,    0.429742,    0.456313,    0.495045,
-  0.506484,    0.519195,    0.580104,    0.810126,    1.391462,    2.579441,
-  4.205160,    7.498399,    12.381597,   21.848607,   33.367199,   42.623190,
-  0.086923,    0.142513,    0.314899,    0.429589,    0.455706,    0.495155,
-  0.507882,    0.542426,    0.702360,    1.119921,    2.168478,    3.463791,
-  6.318607,    10.162284,   18.685361,   28.345760,   47.802028,   49.163533,
-  0.086924,    0.142548,    0.315086,    0.429842,    0.455870,    0.496336,
-  0.512412,    0.556953,    0.773373,    1.266396,    2.548277,    4.204676,
-  7.498399,    12.381597,   21.848607,   33.548250,   54.301011,   56.262859,
-  0.087067,    0.144957,    0.327436,    0.446616,    0.466362,    0.505706,
-  0.522077,    0.610747,    0.972543,    1.666916,    3.338812,    6.316669,
-  10.162284,   18.685361,   28.345760,   48.065311,   66.145302,   78.396020,
-  0.094295,    0.164235,    0.393722,    0.534219,    0.530922,    0.579308,
-  0.603889,    0.760870,    1.229961,    2.423214,    4.173513,    7.497916,
-  12.381597,   21.848607,   33.548250,   54.589585,   74.875848,   86.468182,
-  0.124096,    0.213005,    0.497188,    0.665176,    0.685973,    0.800200,
-  0.911394,    1.077971,    1.677290,    3.332129,    6.314960,    10.162257,
-  18.685361,   28.345760,   48.065311,   66.453506,   98.275189,   96.862588,
-  0.140999,    0.270140,    0.658212,    0.867661,    0.970183,    1.149516,
-  1.480599,    1.664833,    2.421893,    3.857981,    7.418830,    12.380371,
-  21.848607,   33.548250,   54.589585,   75.188867,   106.657971,  99.762997,
-  0.178353,    0.398001,    0.988462,    1.241473,    1.340967,    1.713568,
-  2.335030,    2.701432,    3.348532,    5.077158,    9.829903,    18.676528,
-  28.345700,   48.065311,   66.453506,   98.588283,   117.057193,  101.130722,
-  0.281079,    0.548300,    1.395825,    1.780770,    2.000508,    2.702964,
-  3.638454,    4.573843,    5.051641,    7.079129,    11.293332,   21.594861,
-  33.544335,   54.589585,   75.188867,   106.971065,  119.957601,  101.466632,
-  0.476762,    0.842189,    2.019678,    2.723895,    3.188467,    4.011610,
-  5.545111,    7.508984,    8.176339,    9.774504,    14.720782,   27.334416,
-  48.049609,   66.453506,   98.588283,   117.370357,  121.329855,  101.509242,
-  0.993999,    1.520111,    3.013605,    4.203530,    4.982992,    6.074944,
-  8.583581,    11.818375,   14.192544,   14.937517,   21.258160,   33.305953,
-  54.585735,   75.188867,   106.971135,  120.279824,  121.976055,  102.690130,
-  1.776487,    2.613655,    4.356487,    6.161726,    7.622196,    9.464193,
-  13.077233,   18.051656,   23.221051,   24.080068,   30.085038,   48.345269,
-  66.457698,   98.588353,   117.379415,  121.976128,  124.356210,  107.713202,
-  3.191085,    4.495201,    5.686033,    8.365566,    11.275339,   14.706437,
-  20.300969,   28.152237,   35.688355,   39.341382,   41.030743,   55.752262,
-  75.211764,   106.980285,  120.608403,  124.680746,  130.222528,  112.260098,
-  6.136611,    7.305215,    7.272532,    10.646713,   15.630815,   22.383168,
-  31.349131,   42.419822,   52.301680,   58.983454,   58.915405,   69.161305,
-  98.992460,   117.713855,  124.344836,  130.623638,  138.442401,  127.846670,
-  11.707980,   13.490761,   11.640845,   14.176132,   22.131124,   33.776462,
-  47.365711,   61.603834,   75.281056,   83.463985,   85.510533,   86.026513,
-  108.787480,  123.031136,  130.607284,  138.954406,  160.867784,  158.958882,
-  27.062874,   32.195139,   24.147297,   22.114632,   35.580506,   52.551674,
-  71.652956,   88.606776,   102.107193,  110.703186,  114.398733,  111.118539,
-  121.503578,  132.455924,  139.490806,  161.412674,  193.563210,  172.203945,
-  35.625692,   47.953028,   42.639820,   42.276254,   58.815664,   84.977282,
-  110.656412,  126.168446,  134.658126,  140.604482,  144.006012,  141.702382,
-  140.125323,  153.122630,  164.748041,  194.156197,  206.854650,  174.013079,
-  49.516447,   65.335381,   71.738306,   81.872819,   98.400740,   136.840488,
-  163.775802,  169.440078,  172.747876,  171.222919,  171.679604,  172.173550,
-  168.200129,  187.617133,  199.683394,  207.768200,  210.062520,  175.478356,
-  60.341673,   92.487135,   119.907299,  136.068010,  144.778950,  189.443534,
-  220.120077,  219.641635,  214.616503,  205.894657,  198.453924,  200.013069,
-  195.938103,  206.118661,  210.447375,  212.061379,  216.078218,  181.162805,
-  78.422159,   112.242899,  158.416312,  181.404320,  193.188690,  229.296967,
-  270.461799,  275.168977,  256.511701,  244.706786,  231.344608,  226.065087,
-  222.248618,  218.662324,  217.966722,  218.248574,  218.818588,  182.740573,
-  88.713664,   123.594164,  172.928179,  213.781414,  245.800351,  252.063414,
-  313.283141,  331.703831,  305.866639,  285.177142,  269.759635,  251.988739,
-  245.998388,  232.688076,  230.588702,  230.882657,  230.319053,  192.120741,
-  102.540561,  152.905927,  189.137131,  241.806756,  273.868497,  284.258017,
-  339.689853,  373.561104,  362.657463,  326.291984,  311.922687,  290.460189,
-  276.774381,  273.012072,  277.751792,  279.123748,  278.820447,  233.813798,
-  132.983118,  176.307242,  197.415684,  243.307787,  280.893995,  332.922370,
-  340.329043,  404.530166,  419.475405,  375.775209,  351.300889,  340.042759,
-  315.683832,  306.123530,  306.359319,  306.733063,  307.609556,  261.647847,
-  149.579109,  185.925581,  207.937033,  245.159084,  301.890957,  350.040480,
-  352.250771,  418.742329,  458.112686,  430.125208,  386.460441,  380.346839,
-  354.679150,  337.305620,  334.504124,  335.889932,  341.060725,  286.898578,
-  153.576812,  202.105624,  219.366967,  248.524506,  314.255692,  350.607526,
-  390.567688,  408.629209,  488.000213,  480.563823,  432.461799,  410.412624,
-  398.607371,  400.188740,  402.780916,  408.853470,  430.449735,  363.777088,
-  161.353129,  214.848904,  231.549852,  258.536466,  313.163177,  368.140577,
-  412.136393,  413.409032,  499.838438,  519.571063,  485.833867,  444.562715,
-  435.738129,  442.358549,  450.166531,  453.208524,  458.424358,  385.823139,
-  175.109034,  227.608058,  250.069563,  286.101747,  312.256740,  378.421485,
-  413.344147,  435.058646,  476.960941,  542.448886,  530.189154,  495.408402,
-  475.326752,  465.017144,  464.694045,  465.144689,  466.905382,  398.669138,
-  184.750180,  240.766694,  283.240772,  305.480150,  322.409001,  374.526162,
-  427.141326,  452.840323,  472.604139,  545.366105,  567.676694,  541.666203,
-  509.591873,  492.044219,  492.778569,  493.765684,  493.235693,  413.684325,
-  194.728357,  254.928927,  289.991157,  300.193195,  324.194589,  371.563147,
-  439.226438,  468.295088,  495.654854,  533.506353,  587.476353,  578.298989,
-  548.041942,  527.393885,  538.965146,  545.070442,  544.295454,  454.012211,
-  205.195287,  283.135677,  297.921431,  319.295927,  355.621830,  392.466463,
-  446.696167,  485.053519,  516.426615,  532.264584,  588.481600,  615.906737,
-  589.319634,  555.754316,  558.389367,  569.094521,  569.779764,  475.384946,
-  218.552054,  298.511016,  319.188338,  351.781666,  372.789510,  412.827434,
-  464.569387,  506.270203,  533.049810,  553.347364,  580.644599,  632.759854,
-  622.235843,  569.960552,  580.799340,  586.553714,  579.488366,  491.826482,
-  244.803348,  299.790203,  324.187975,  363.280782,  403.710443,  441.724083,
-  492.732682,  534.722691,  552.193622,  575.112647,  586.097705,  635.224970,
-  644.642944,  606.017786,  640.321218,  642.316989,  616.397020,  548.300111,
-  256.957358,  318.638991,  355.063346,  389.889307,  433.607315,  468.209001,
-  515.178157,  573.556591,  578.113115,  587.246475,  601.762801,  638.454644,
-  656.574853,  641.184609,  676.908189,  684.198162,  678.387412,  574.805864,
-  251.211502,  323.448532,  364.227424,  411.792704,  462.226488,  503.572288,
-  549.299249,  599.124071,  601.227977,  597.118176,  613.247552,  633.278532,
-  658.074755,  664.930719,  685.731531,  693.632845,  693.076350,  578.326477,
-  267.695377,  354.273736,  389.976833,  438.518178,  493.332686,  544.343027,
-  588.895829,  620.206193,  628.327410,  606.067827,  620.998532,  657.985256,
-  683.936059,  691.345257,  693.894723,  695.175306,  693.618786,  578.517148,
-  274.290725,  363.465288,  411.808596,  463.369805,  515.310226,  581.009306,
-  613.070738,  636.638714,  647.333929,  629.867603,  644.646319,  687.796202,
-  702.859596,  713.495479,  704.068069,  704.991807,  704.188594,  587.283658,
-  302.538449,  389.174737,  438.518422,  493.398902,  547.662399,  601.981814,
-  624.773046,  641.629484,  644.699451,  645.848784,  668.033340,  703.643523,
-  707.422408,  717.329600,  726.298973,  744.127507,  745.365167,  617.954068,
-  310.328188,  410.984766,  463.369805,  515.315010,  581.309832,  613.787792,
-  634.988538,  654.145284,  662.632978,  668.413496,  706.494057,  750.545471,
-  730.724808,  730.002100,  743.625262,  750.801609,  745.308457,  606.505800,
-  329.948756,  437.600191,  493.398902,  547.661910,  601.917884,  622.557745,
-  633.244395,  644.055898,  648.224221,  665.062911,  763.555733,  812.391078,
-  769.063582,  744.865168,  727.579796,  724.950408,  722.179707,  598.564510,
-  350.848328,  462.437458,  515.315010,  581.309823,  613.779123,  634.465309,
-  652.056257,  662.179143,  671.466297,  726.881256,  819.824030,  880.232789,
-  810.371672,  754.246481,  725.053473,  724.253390,  723.503395,  603.394909,
-  373.704088,  492.408266,  547.661910,  601.917884,  622.557620,  633.236320,
-  644.023513,  648.232514,  666.381639,  785.498283,  929.441612,  999.772800,
-  890.339033,  775.852504,  731.840181,  726.905100,  725.251844,  604.899901,
-  394.473422,  514.261306,  581.309823,  613.779123,  634.465309,  652.056257,
-  662.179143,  671.466557,  727.134512,  835.764144,  981.747089,  1018.462934,
-  939.686967,  811.276731,  739.398459,  727.365647,  725.285425,  604.923525,
-  419.976505,  546.538939,  601.917884,  622.557620,  633.236320,  644.023513,
-  648.232514,  666.381639,  785.545191,  932.841398,  1036.609617, 1026.945092,
-  963.822765,  840.827315,  755.532423,  730.241865,  725.366847,  604.924155,
-  437.281359,  580.116337,  613.779123,  634.465309,  652.056257,  662.179143,
-  671.466557,  727.134512,  835.764859,  981.996194,  1031.896881, 1002.544732,
-  881.157178,  828.151494,  799.340975,  751.314325,  728.316587,  605.005504,
-  464.713920,  600.649281,  622.557620,  633.236320,  644.023513,  648.232514,
-  666.381639,  785.545191,  932.841398,  1036.735329, 1035.037004, 995.478339,
-  858.093733,  823.471976,  819.881754,  798.749289,  749.440463,  607.955244,
-  495.880237,  612.473139,  634.465309,  652.056257,  662.179143,  671.466557,
-  727.134512,  835.764859,  981.996194,  1032.339788, 1031.105117, 995.303259,
-  857.733663,  823.435877,  822.822791,  819.873050,  796.882480,  629.038445,
-  510.391280,  621.158273,  633.236320,  644.023513,  648.232514,  666.381639,
-  785.545191,  932.841398,  1036.735329, 1035.566013, 1029.599350, 994.926093,
-  857.645648,  823.435143,  822.904139,  822.822791,  817.965681,  673.856962,
-  514.588176,  632.947715,  652.056257,  662.179143,  671.466557,  727.134512,
-  835.764859,  981.996194,  1032.339788, 1031.547475, 1023.835377, 972.158629,
-  851.968626,  823.347128,  822.904770,  822.904139,  820.752301,  684.418900,
-  520.013294,  631.668183,  644.023513,  648.232514,  666.381639,  785.545191,
-  932.841398,  1036.735329, 1035.567378, 1029.776746, 1001.044108, 880.853721,
-  829.201546,  822.994150,  822.904770,  822.904770,  820.792975,  684.582020,
-  531.253628,  650.479606,  662.179143,  671.466557,  727.134512,  835.764859,
-  981.996194,  1032.339788, 1031.636855, 1029.601779, 995.366703,  858.086641,
-  823.524524,  822.906135,  822.904770,  822.904770,  820.792975,  684.582020,
-  528.531744,  642.424501,  648.232514,  666.381639,  785.545191,  932.841398,
-  1036.735329, 1035.567378, 1030.219103, 1029.576226, 995.278687,  857.733663,
-  823.436508,  822.904770,  822.904770,  822.904770,  820.792975,  684.582020,
-  545.401164,  660.550678,  671.508859,  727.304161,  835.807162,  981.996850,
-  1032.339788, 1031.636855, 1030.130788, 1029.487827, 994.925709,  857.645648,
-  823.435143,  822.904770,  822.904770,  822.904770,  820.792975,  684.582020,
-  537.684760,  646.650947,  669.110131,  796.487512,  935.569890,  1036.777631,
-  1035.567378, 1030.219103, 1030.018584, 1023.810805, 972.158629,  851.968626,
-  823.347128,  822.904770,  822.904770,  822.904770,  820.792975,  684.582020,
-  552.408370,  670.001885,  738.246482,  879.690154,  992.939171,  1032.509436,
-  1031.636855, 1030.132153, 1029.665223, 1001.043724, 880.853721,  829.201546,
-  822.994150,  822.904770,  822.904770,  822.904770,  820.792975,  684.582020,
-  539.835902,  667.496388,  799.216004,  946.512211,  1039.506123, 1035.609680,
-  1030.219103, 1030.107964, 1029.577207, 995.366703,  858.086641,  823.524524,
-  822.906135,  822.904770,  822.904770,  822.904770,  820.792975,  684.582020,
-  558.362529,  734.277451,  877.197218,  990.478243,  1029.908393, 1028.993978,
-  1027.488620, 1027.464048, 1026.933674, 992.724534,  855.532488,  821.323349,
-  820.792975,  820.792975,  820.792975,  820.792975,  818.686600,  682.825198,
-  453.127195,  649.075095,  780.278390,  867.165890,  862.469711,  857.067460,
-  856.956321,  856.955937,  856.513579,  827.981461,  713.556496,  685.024378,
-  684.582020,  684.582020,  684.582020,  684.582020,  682.825198,  569.510056,
-};
-
-static const double interp_dgrid_surf[65 * 18] = {
-  10.650434, 12.204694, 12.040917, 11.843008, 11.845578, 12.051535, 12.103583,
-  12.136780, 12.266709, 12.299107, 12.299673, 12.303120, 12.316337, 12.293431,
-  12.092165, 11.602421, 11.141559, 8.864495,  12.770003, 14.634889, 14.437149,
-  14.199413, 14.202487, 14.449423, 14.511827, 14.551629, 14.707410, 14.746265,
-  14.747610, 14.753705, 14.762194, 14.699395, 14.390525, 13.690970, 12.874168,
-  10.367121, 12.832328, 14.790730, 14.503765, 14.236403, 14.239028, 14.486600,
-  14.549164, 14.589069, 14.745250, 14.784258, 14.788320, 14.801930, 14.762798,
-  14.499088, 14.021544, 13.469684, 12.661560, 10.108384, 12.950520, 15.264726,
-  14.621957, 14.238236, 14.239028, 14.486601, 14.549264, 14.589469, 14.745361,
-  14.784949, 14.791572, 14.798652, 14.660251, 14.119394, 13.651131, 12.935657,
-  12.176082, 9.228999,  12.979992, 15.382918, 14.651428, 14.238693, 14.239028,
-  14.486701, 14.555710, 14.615321, 14.751849, 14.787700, 14.797104, 14.743189,
-  14.475057, 13.944406, 13.450468, 12.687876, 11.824993, 8.906683,  12.980449,
-  15.384750, 14.651885, 14.238700, 14.239028, 14.487102, 14.581562, 14.718998,
-  14.777721, 14.788445, 14.778661, 14.582790, 14.099785, 13.649637, 12.935359,
-  12.201859, 10.891931, 8.482221,  12.980449, 15.384750, 14.651886, 14.238801,
-  14.239434, 14.487303, 14.588010, 14.744860, 14.784773, 14.786094, 14.735647,
-  14.455704, 13.939591, 13.450393, 12.687876, 11.849334, 10.476658, 8.043672,
-  12.980449, 15.384750, 14.651987, 14.245320, 14.265579, 14.493824, 14.588211,
-  14.745312, 14.787263, 14.775934, 14.582036, 14.099475, 13.649563, 12.935358,
-  12.201859, 10.911285, 9.730570,  6.696921,  12.980449, 15.384750, 14.652393,
-  14.271466, 14.370434, 14.520069, 14.589027, 14.746028, 14.785482, 14.735605,
-  14.455693, 13.939590, 13.450393, 12.687876, 11.849334, 10.494514, 9.195398,
-  6.215460,  12.980449, 15.384750, 14.652494, 14.277985, 14.396679, 14.533035,
-  14.615021, 14.754825, 14.775610, 14.582796, 14.099664, 13.649565, 12.935358,
-  12.201859, 10.911285, 9.747361,  7.779960,  5.617541,  12.980448, 15.384731,
-  14.652415, 14.278078, 14.397578, 14.559053, 14.718657, 14.776398, 14.747044,
-  14.504690, 13.951810, 13.450583, 12.687876, 11.849334, 10.494514, 9.210817,
-  7.210003,  5.164575,  12.980446, 15.383448, 14.647073, 14.277541, 14.403813,
-  14.569546, 14.744956, 14.765103, 14.629073, 14.296161, 13.698573, 12.936118,
-  12.201859, 10.911285, 9.747361,  7.790897,  6.322998,  3.931551,  12.981550,
-  15.376916, 14.615597, 14.274820, 14.437479, 14.575942, 14.707492, 14.734111,
-  14.515975, 14.000806, 13.462803, 12.688066, 11.849334, 10.494514, 9.210817,
-  7.219566,  5.781392,  3.486081,  12.991899, 15.376201, 14.579444, 14.296898,
-  14.473361, 14.522910, 14.491600, 14.543267, 14.288580, 13.700311, 12.936579,
-  12.201867, 10.911285, 9.747361,  7.790897,  6.331506,  4.480348,  2.923138,
-  13.019848, 15.383477, 14.582260, 14.385262, 14.452673, 14.436019, 14.238174,
-  14.255993, 13.977481, 13.532342, 12.705591, 11.849605, 10.494514, 9.210817,
-  7.219566,  5.789642,  4.018194,  2.766222,  13.028558, 15.315782, 14.439141,
-  14.326286, 14.452429, 14.311731, 14.033235, 13.922587, 13.665868, 13.207897,
-  12.274375, 10.912967, 9.747371,  7.790897,  6.331506,  4.488594,  3.454993,
-  2.692682,  12.992752, 15.321471, 14.409573, 14.236340, 14.322969, 14.049072,
-  13.764823, 13.479242, 13.250105, 12.759133, 12.019174, 10.532951, 9.211409,
-  7.219566,  5.789642,  4.026440,  3.298077,  2.674624,  12.945493, 15.276596,
-  14.315745, 14.026198, 14.085774, 13.844563, 13.447576, 12.964935, 12.735525,
-  12.288592, 11.511693, 9.900227,  7.793270,  6.331506,  4.488594,  3.463236,
-  3.224318,  2.672433,  12.757570, 15.056661, 14.095011, 13.722362, 13.812624,
-  13.608480, 13.021206, 12.367627, 11.937931, 11.581049, 10.599552, 9.247860,
-  7.220151,  5.789642,  4.026437,  3.305882,  3.191260,  2.615317,  12.581293,
-  14.824658, 13.909074, 13.496158, 13.491402, 13.221550, 12.514140, 11.677229,
-  10.936895, 10.619912, 9.634779,  7.763570,  6.331082,  4.488590,  3.462798,
-  3.216460,  3.076315,  2.373499,  12.283499, 14.455760, 13.890593, 13.427587,
-  13.183783, 12.763833, 11.861006, 10.740618, 9.820756,  9.354945,  8.669862,
-  7.123268,  5.787860,  4.025994,  3.290000,  3.084410,  2.810905,  2.222916,
-  12.010893, 14.300919, 13.986624, 13.484026, 13.025385, 12.224281, 11.064265,
-  9.631040,  8.594396,  8.003736,  7.561587,  6.274418,  4.466637,  3.446574,
-  3.102467,  2.816989,  2.598688,  1.951541,  11.581477, 13.831132, 13.632027,
-  13.380414, 12.807880, 11.665651, 10.218236, 8.562237,  7.222614,  6.611808,
-  6.261676,  5.402793,  3.938544,  3.174375,  2.818166,  2.602758,  2.213911,
-  1.434763,  11.050735, 12.893449, 12.363152, 12.712829, 12.012961, 10.887854,
-  9.109699,  7.421701,  5.965603,  5.272129,  4.991435,  4.423000,  3.369988,
-  2.800371,  2.593901,  2.217431,  1.670917,  1.215265,  10.641194, 11.766277,
-  10.777082, 10.972917, 10.689298, 9.701545,  7.719947,  6.145654,  4.872442,
-  4.099600,  3.880934,  3.514159,  2.786474,  2.368963,  2.162376,  1.673670,
-  1.450770,  1.185424,  10.071964, 11.107701, 9.172361,  8.551313,  8.412080,
-  7.641397,  6.174246,  4.853916,  3.904549,  3.246810,  2.959903,  2.785066,
-  2.240001,  1.793166,  1.585520,  1.449824,  1.405368,  1.168856,  9.213182,
-  9.173278,  7.219231,  6.242951,  5.626013,  5.768007,  4.908666,  3.809589,
-  3.115109,  2.617899,  2.274793,  2.172960,  1.838597,  1.505915,  1.414333,
-  1.392666,  1.338173,  1.105611,  7.365015,  7.471370,  5.622346,  4.520127,
-  3.936272,  4.208822,  3.623024,  2.977794,  2.450003,  2.097261,  1.824090,
-  1.643270,  1.473525,  1.351388,  1.327504,  1.323865,  1.307894,  1.088234,
-  6.198210,  6.580712,  4.682511,  3.416952,  2.941929,  2.766637,  2.650686,
-  2.315439,  1.925838,  1.659784,  1.464419,  1.252806,  1.162722,  1.197518,
-  1.199875,  1.197365,  1.194040,  0.995797,  5.402507,  5.055466,  3.728724,
-  2.624359,  2.165810,  1.943189,  1.918190,  1.738078,  1.516328,  1.290520,
-  1.155793,  1.015962,  0.881900,  0.807203,  0.754242,  0.743378,  0.740288,
-  0.614158,  3.937867,  3.862507,  2.884664,  2.088147,  1.648496,  1.473584,
-  1.340123,  1.291769,  1.165381,  1.000224,  0.893316,  0.821333,  0.691363,
-  0.610501,  0.586766,  0.583762,  0.577840,  0.468733,  3.104660,  3.181078,
-  2.420208,  1.747442,  1.297956,  1.109835,  0.970385,  0.943229,  0.876923,
-  0.777584,  0.678183,  0.628623,  0.553745,  0.523430,  0.519490,  0.514394,
-  0.492259,  0.403172,  2.593833,  2.533720,  2.010452,  1.480944,  1.060302,
-  0.846383,  0.738703,  0.673144,  0.658010,  0.592449,  0.518236,  0.470335,
-  0.425088,  0.393168,  0.378116,  0.355846,  0.275469,  0.213128,  2.176988,
-  2.089575,  1.671284,  1.225008,  0.895382,  0.672008,  0.566241,  0.496746,
-  0.488005,  0.449874,  0.400899,  0.354002,  0.318150,  0.281533,  0.238545,
-  0.224159,  0.202399,  0.160681,  1.874679,  1.769165,  1.430124,  1.068727,
-  0.780272,  0.557801,  0.441643,  0.377256,  0.352957,  0.338452,  0.304965,
-  0.273172,  0.240052,  0.208724,  0.193431,  0.190845,  0.185025,  0.138166,
-  1.590226,  1.502830,  1.193127,  0.917885,  0.670432,  0.474546,  0.355420,
-  0.292305,  0.259035,  0.249937,  0.232079,  0.208943,  0.181936,  0.160038,
-  0.152257,  0.151235,  0.149583,  0.120747,  1.331730,  1.255907,  1.012871,
-  0.778422,  0.578977,  0.412432,  0.293155,  0.231824,  0.197187,  0.183921,
-  0.174876,  0.157252,  0.140263,  0.127050,  0.110244,  0.105041,  0.104323,
-  0.086944,  1.153994,  1.118771,  0.822355,  0.612321,  0.478249,  0.348222,
-  0.247408,  0.186141,  0.152714,  0.135445,  0.129810,  0.119994,  0.115619,
-  0.131626,  0.095612,  0.079343,  0.077502,  0.064550,  0.946317,  0.925894,
-  0.677969,  0.499906,  0.397101,  0.297931,  0.214467,  0.152333,  0.120731,
-  0.102686,  0.095062,  0.090361,  0.122319,  0.240194,  0.112687,  0.070690,
-  0.070461,  0.054194,  0.824155,  0.787241,  0.581856,  0.419228,  0.313167,
-  0.245582,  0.183500,  0.128101,  0.096577,  0.080267,  0.071022,  0.066851,
-  0.085754,  0.154163,  0.075884,  0.052401,  0.054270,  0.026656,  0.716310,
-  0.671378,  0.489580,  0.349569,  0.256155,  0.206343,  0.157853,  0.111950,
-  0.079271,  0.062518,  0.053441,  0.049660,  0.051400,  0.063778,  0.039993,
-  0.029133,  0.023382,  0.013725,  0.614125,  0.579096,  0.417126,  0.299465,
-  0.217849,  0.165515,  0.129040,  0.093127,  0.065612,  0.049543,  0.041429,
-  0.036850,  0.034416,  0.033989,  0.024216,  0.017377,  0.014833,  0.011987,
-  0.520407,  0.487239,  0.349473,  0.251741,  0.184897,  0.135813,  0.107098,
-  0.073607,  0.053938,  0.040531,  0.032931,  0.028876,  0.025759,  0.022168,
-  0.016739,  0.014638,  0.014333,  0.011947,  0.449954,  0.415124,  0.299452,
-  0.216942,  0.158874,  0.115334,  0.088821,  0.060105,  0.042610,  0.032566,
-  0.026903,  0.023123,  0.019913,  0.016835,  0.014306,  0.013625,  0.013535,
-  0.011284,  0.377618,  0.347773,  0.251741,  0.184839,  0.132857,  0.095439,
-  0.070462,  0.052244,  0.036078,  0.026025,  0.021518,  0.018487,  0.015361,
-  0.012905,  0.011470,  0.010569,  0.010283,  0.008297,  0.319953,  0.297976,
-  0.216942,  0.158842,  0.113280,  0.080426,  0.057367,  0.041987,  0.030135,
-  0.022295,  0.017901,  0.015121,  0.012224,  0.010035,  0.009353,  0.009108,
-  0.008695,  0.006139,  0.267864,  0.250502,  0.184839,  0.132851,  0.095039,
-  0.068220,  0.049135,  0.035315,  0.025144,  0.018237,  0.013857,  0.012094,
-  0.009715,  0.007743,  0.006937,  0.006446,  0.006243,  0.004929,  0.230449,
-  0.215895,  0.158842,  0.113280,  0.080417,  0.057174,  0.041304,  0.029959,
-  0.021866,  0.015673,  0.012133,  0.010083,  0.007801,  0.006053,  0.005401,
-  0.003834,  0.003429,  0.002851,  0.193984,  0.183963,  0.132851,  0.095039,
-  0.068220,  0.049133,  0.035305,  0.025140,  0.018150,  0.013175,  0.010422,
-  0.008491,  0.006397,  0.004567,  0.003494,  0.002933,  0.002825,  0.002355,
-  0.167298,  0.158088,  0.113280,  0.080417,  0.057174,  0.041304,  0.029959,
-  0.021866,  0.015669,  0.011955,  0.009257,  0.007051,  0.005543,  0.003905,
-  0.002984,  0.002825,  0.002814,  0.002347,  0.143228,  0.132220,  0.095039,
-  0.068220,  0.049133,  0.035305,  0.025140,  0.018150,  0.013174,  0.010394,
-  0.008403,  0.006661,  0.005378,  0.003545,  0.002876,  0.002818,  0.002814,
-  0.002347,  0.122934,  0.112735,  0.080417,  0.057174,  0.041304,  0.029959,
-  0.021866,  0.015669,  0.011955,  0.009258,  0.007182,  0.006012,  0.003762,
-  0.002866,  0.002739,  0.002788,  0.002810,  0.002347,  0.101934,  0.094569,
-  0.068220,  0.049133,  0.035305,  0.025140,  0.018150,  0.013174,  0.010394,
-  0.008405,  0.006797,  0.005845,  0.003333,  0.002703,  0.002695,  0.002723,
-  0.002781,  0.002343,  0.086702,  0.080014,  0.057174,  0.041304,  0.029959,
-  0.021866,  0.015669,  0.011955,  0.009258,  0.007190,  0.006533,  0.005839,
-  0.003326,  0.002700,  0.002690,  0.002694,  0.002716,  0.002314,  0.073040,
-  0.067886,  0.049133,  0.035305,  0.025140,  0.018150,  0.013174,  0.010394,
-  0.008405,  0.006807,  0.006468,  0.005831,  0.003325,  0.002700,  0.002690,
-  0.002690,  0.002687,  0.002253,  0.061685,  0.056890,  0.041304,  0.029959,
-  0.021866,  0.015669,  0.011955,  0.009258,  0.007190,  0.006542,  0.006360,
-  0.005416,  0.003221,  0.002698,  0.002690,  0.002690,  0.002683,  0.002238,
-  0.052465,  0.048894,  0.035305,  0.025140,  0.018150,  0.013174,  0.010394,
-  0.008405,  0.006807,  0.006472,  0.005943,  0.003748,  0.002805,  0.002692,
-  0.002690,  0.002690,  0.002683,  0.002238,  0.043838,  0.041101,  0.029959,
-  0.021866,  0.015669,  0.011955,  0.009258,  0.007190,  0.006543,  0.006465,
-  0.005839,  0.003333,  0.002702,  0.002690,  0.002690,  0.002690,  0.002683,
-  0.002238,  0.037824,  0.035133,  0.025140,  0.018150,  0.013174,  0.010394,
-  0.008405,  0.006807,  0.006480,  0.006464,  0.005838,  0.003326,  0.002700,
-  0.002690,  0.002690,  0.002690,  0.002683,  0.002238,  0.031865,  0.029815,
-  0.021866,  0.015668,  0.011955,  0.009258,  0.007190,  0.006543,  0.006475,
-  0.006462,  0.005831,  0.003325,  0.002700,  0.002690,  0.002690,  0.002690,
-  0.002683,  0.002238,  0.027150,  0.025016,  0.018128,  0.013083,  0.010371,
-  0.008405,  0.006807,  0.006480,  0.006472,  0.006359,  0.005416,  0.003221,
-  0.002698,  0.002690,  0.002690,  0.002690,  0.002683,  0.002238,  0.023094,
-  0.021760,  0.015577,  0.011590,  0.009167,  0.007188,  0.006543,  0.006475,
-  0.006466,  0.005943,  0.003748,  0.002805,  0.002692,  0.002690,  0.002690,
-  0.002690,  0.002683,  0.002238,  0.019269,  0.018038,  0.013060,  0.010280,
-  0.008382,  0.006806,  0.006480,  0.006474,  0.006464,  0.005839,  0.003333,
-  0.002702,  0.002690,  0.002690,  0.002690,  0.002690,  0.002683,  0.002238,
-  0.016874,  0.015472,  0.011566,  0.009148,  0.007171,  0.006527,  0.006458,
-  0.006457,  0.006447,  0.005823,  0.003318,  0.002693,  0.002683,  0.002683,
-  0.002683,  0.002683,  0.002676,  0.002232,  0.011968,  0.011056,  0.008762,
-  0.007219,  0.005717,  0.005391,  0.005386,  0.005386,  0.005377,  0.004856,
-  0.002767,  0.002246,  0.002238,  0.002238,  0.002238,  0.002238,  0.002232,
-  0.001862,
-};
-
-void av1_model_rd_surffit(double xm, double yl, double *rate_f,
-                          double *dist_f) {
-  const double x_start = -0.5;
-  const double x_end = 16.5;
-  const double x_step = 1;
-  const double y_start = -15.5;
-  const double y_end = 16.5;
-  const double y_step = 0.5;
-  const double epsilon = 1e-6;
-  const int stride = (int)rint((x_end - x_start) / x_step) + 1;
-  (void)y_end;
-
-  xm = AOMMAX(xm, x_start + x_step + epsilon);
-  xm = AOMMIN(xm, x_end - x_step - epsilon);
-  yl = AOMMAX(yl, y_start + y_step + epsilon);
-  yl = AOMMIN(yl, y_end - y_step - epsilon);
-
-  const double y = (yl - y_start) / y_step;
-  const double x = (xm - x_start) / x_step;
-
-  const int yi = (int)floor(y);
-  const int xi = (int)floor(x);
-  assert(xi > 0);
-  assert(yi > 0);
-
-  const double yo = y - yi;
-  const double xo = x - xi;
-  const double *prate = &interp_rgrid_surf[(yi - 1) * stride + (xi - 1)];
-  const double *pdist = &interp_dgrid_surf[(yi - 1) * stride + (xi - 1)];
-  *rate_f = interp_bicubic(prate, stride, xo, yo);
-  *dist_f = interp_bicubic(pdist, stride, xo, yo);
-}
-
-static const double interp_rgrid_curv[65] = {
-  0.000000,    0.000000,    0.000000,    0.000000,    0.000000,     0.000000,
-  0.000000,    0.000000,    0.000000,    0.000000,    0.000000,     0.000000,
-  0.000000,    0.000000,    0.000000,    0.000000,    0.000000,     4.759876,
-  8.132086,    13.651828,   21.908271,   33.522054,   48.782376,    71.530983,
-  106.728649,  151.942795,  199.893011,  242.850965,  283.933923,   322.154203,
-  360.684608,  394.801656,  426.879017,  460.234313,  484.103987,   508.261495,
-  536.486763,  558.196737,  586.285894,  614.764511,  634.166333,   647.706472,
-  658.211478,  681.360407,  701.052141,  727.007310,  768.663973,   804.407660,
-  884.627751,  1065.658131, 1238.875214, 1440.185176, 1678.377931,  1962.243390,
-  2300.571467, 2702.152072, 3175.775119, 3730.230519, 4374.308184,  5116.798028,
-  5966.489961, 6932.173897, 8022.639747, 9246.677424, 10613.076839,
-};
-
-static const double interp_dgrid_curv[65] = {
-  14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855,
-  14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.555776, 14.533692,
-  14.439920, 14.257791, 13.977230, 13.623229, 13.064884, 12.355411, 11.560773,
-  10.728960, 9.861975,  8.643612,  6.916021,  5.154769,  3.734940,  2.680051,
-  1.925506,  1.408410,  1.042223,  0.767641,  0.565392,  0.420116,  0.310427,
-  0.231711,  0.172999,  0.128293,  0.094992,  0.072171,  0.052972,  0.039354,
-  0.029555,  0.022857,  0.016832,  0.013297,  0.000000,  0.000000,  0.000000,
-  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
-  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
-  0.000000,  0.000000,
-};
-
-void av1_model_rd_curvfit(double xqr, double *rate_f, double *distbysse_f) {
-  const double x_start = -15.5;
-  const double x_end = 16.5;
-  const double x_step = 0.5;
-  const double epsilon = 1e-6;
-  (void)x_end;
-
-  xqr = AOMMAX(xqr, x_start + x_step + epsilon);
-  xqr = AOMMIN(xqr, x_end - x_step - epsilon);
-  const double x = (xqr - x_start) / x_step;
-  const int xi = (int)floor(x);
-  const double xo = x - xi;
-
-  assert(xi > 0);
-
-  const double *prate = &interp_rgrid_curv[(xi - 1)];
-  const double *pdist = &interp_dgrid_curv[(xi - 1)];
-  *rate_f = interp_cubic(prate, xo);
-  *distbysse_f = interp_cubic(pdist, xo);
-}
-
-static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
-                                       const struct macroblockd_plane *pd,
-                                       ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
-                                       ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
-  const int num_4x4_w = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  const int num_4x4_h = block_size_high[plane_bsize] >> tx_size_high_log2[0];
-  const ENTROPY_CONTEXT *const above = pd->above_context;
-  const ENTROPY_CONTEXT *const left = pd->left_context;
-
-  memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
-  memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
-}
-
-void av1_get_entropy_contexts(BLOCK_SIZE bsize,
-                              const struct macroblockd_plane *pd,
-                              ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
-                              ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
-  const BLOCK_SIZE plane_bsize =
-      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-  get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
-}
-
-void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
-                 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
-  int i;
-  int zero_seen = 0;
-  int best_sad = INT_MAX;
-  int this_sad = INT_MAX;
-  int max_mv = 0;
-  uint8_t *src_y_ptr = x->plane[0].src.buf;
-  uint8_t *ref_y_ptr;
-  MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
-  int num_mv_refs = 0;
-  const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
-  const int_mv ref_mv =
-      av1_get_ref_mv_from_stack(0, ref_frames, 0, x->mbmi_ext);
-  const int_mv ref_mv1 =
-      av1_get_ref_mv_from_stack(0, ref_frames, 1, x->mbmi_ext);
-
-  pred_mv[num_mv_refs++] = ref_mv.as_mv;
-  if (ref_mv.as_int != ref_mv1.as_int) {
-    pred_mv[num_mv_refs++] = ref_mv1.as_mv;
-  }
-  if (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size)
-    pred_mv[num_mv_refs++] = x->pred_mv[ref_frame];
-
-  assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
-
-  // Get the sad for each candidate reference mv.
-  for (i = 0; i < num_mv_refs; ++i) {
-    const MV *this_mv = &pred_mv[i];
-    int fp_row, fp_col;
-    fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
-    fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
-    max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
-
-    if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
-    zero_seen |= (fp_row == 0 && fp_col == 0);
-
-    ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
-    // Find sad for current vector.
-    this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
-                                           ref_y_ptr, ref_y_stride);
-    // Note if it is the best so far.
-    if (this_sad < best_sad) {
-      best_sad = this_sad;
-    }
-  }
-
-  // Note the index of the mv that worked best in the reference list.
-  x->max_mv_context[ref_frame] = max_mv;
-  x->pred_mv_sad[ref_frame] = best_sad;
-}
-
-void av1_setup_pred_block(const MACROBLOCKD *xd,
-                          struct buf_2d dst[MAX_MB_PLANE],
-                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
-                          const struct scale_factors *scale,
-                          const struct scale_factors *scale_uv,
-                          const int num_planes) {
-  int i;
-
-  dst[0].buf = src->y_buffer;
-  dst[0].stride = src->y_stride;
-  dst[1].buf = src->u_buffer;
-  dst[2].buf = src->v_buffer;
-  dst[1].stride = dst[2].stride = src->uv_stride;
-
-  for (i = 0; i < num_planes; ++i) {
-    setup_pred_plane(dst + i, xd->mi[0]->sb_type, dst[i].buf,
-                     i ? src->uv_crop_width : src->y_crop_width,
-                     i ? src->uv_crop_height : src->y_crop_height,
-                     dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
-                     xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
-  }
-}
-
-int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
-                            int stride) {
-  const int bw = mi_size_wide_log2[plane_bsize];
-  const int y = 4 * (raster_block >> bw);
-  const int x = 4 * (raster_block & ((1 << bw) - 1));
-  return y * stride + x;
-}
-
-int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
-                                       int16_t *base) {
-  const int stride = block_size_wide[plane_bsize];
-  return base + av1_raster_block_offset(plane_bsize, raster_block, stride);
-}
-
-YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
-                                             int ref_frame) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
-  const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
-  return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
-             ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
-             : NULL;
-}
-
-int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
-                            const MACROBLOCKD *xd) {
-  if (cm->interp_filter == SWITCHABLE) {
-    const MB_MODE_INFO *const mbmi = xd->mi[0];
-    int inter_filter_cost = 0;
-    int dir;
-
-    for (dir = 0; dir < 2; ++dir) {
-      const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-      const InterpFilter filter =
-          av1_extract_interp_filter(mbmi->interp_filters, dir);
-      inter_filter_cost += x->switchable_interp_costs[ctx][filter];
-    }
-    return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
-  } else {
-    return 0;
-  }
-}
-
-void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
-  int i;
-  RD_OPT *const rd = &cpi->rd;
-  SPEED_FEATURES *const sf = &cpi->sf;
-
-  // Set baseline threshold values.
-  for (i = 0; i < MAX_MODES; ++i) rd->thresh_mult[i] = cpi->oxcf.mode == 0;
-
-  if (sf->adaptive_rd_thresh) {
-    rd->thresh_mult[THR_NEARESTMV] = 300;
-    rd->thresh_mult[THR_NEARESTL2] = 300;
-    rd->thresh_mult[THR_NEARESTL3] = 300;
-    rd->thresh_mult[THR_NEARESTB] = 300;
-    rd->thresh_mult[THR_NEARESTA2] = 300;
-    rd->thresh_mult[THR_NEARESTA] = 300;
-    rd->thresh_mult[THR_NEARESTG] = 300;
-  } else {
-    rd->thresh_mult[THR_NEARESTMV] = 0;
-    rd->thresh_mult[THR_NEARESTL2] = 0;
-    rd->thresh_mult[THR_NEARESTL3] = 0;
-    rd->thresh_mult[THR_NEARESTB] = 0;
-    rd->thresh_mult[THR_NEARESTA2] = 0;
-    rd->thresh_mult[THR_NEARESTA] = 0;
-    rd->thresh_mult[THR_NEARESTG] = 0;
-  }
-
-  rd->thresh_mult[THR_NEWMV] += 1000;
-  rd->thresh_mult[THR_NEWL2] += 1000;
-  rd->thresh_mult[THR_NEWL3] += 1000;
-  rd->thresh_mult[THR_NEWB] += 1000;
-  rd->thresh_mult[THR_NEWA2] = 1000;
-  rd->thresh_mult[THR_NEWA] += 1000;
-  rd->thresh_mult[THR_NEWG] += 1000;
-
-  rd->thresh_mult[THR_NEARMV] += 1000;
-  rd->thresh_mult[THR_NEARL2] += 1000;
-  rd->thresh_mult[THR_NEARL3] += 1000;
-  rd->thresh_mult[THR_NEARB] += 1000;
-  rd->thresh_mult[THR_NEARA2] = 1000;
-  rd->thresh_mult[THR_NEARA] += 1000;
-  rd->thresh_mult[THR_NEARG] += 1000;
-
-  rd->thresh_mult[THR_GLOBALMV] += 2000;
-  rd->thresh_mult[THR_GLOBALL2] += 2000;
-  rd->thresh_mult[THR_GLOBALL3] += 2000;
-  rd->thresh_mult[THR_GLOBALB] += 2000;
-  rd->thresh_mult[THR_GLOBALA2] = 2000;
-  rd->thresh_mult[THR_GLOBALG] += 2000;
-  rd->thresh_mult[THR_GLOBALA] += 2000;
-
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] += 1000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] += 1000;
-
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] += 2000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] += 2000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] += 2000;
-  rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] += 2000;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARL2A] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARL2A] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARL3A] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWL3A] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARL3A] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWL3A] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARGA] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWGA] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARLB] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARLB] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARL2B] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWL2B] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARL2B] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWL2B] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARL3B] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWL3B] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARL3B] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWL3B] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARGB] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWGB] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTGB] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWGB] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARGB] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWGB] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARLA2] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWLA2] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARLA2] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWLA2] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARL2A2] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWL2A2] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARL3A2] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWL3A2] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARGA2] += 1200;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] += 1500;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] += 1500;
-  rd->thresh_mult[THR_COMP_NEAR_NEWGA2] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEARGA2] += 1700;
-  rd->thresh_mult[THR_COMP_NEW_NEWGA2] += 2000;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] += 2500;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1600;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 2000;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] += 2000;
-  rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 2200;
-  rd->thresh_mult[THR_COMP_NEW_NEARLL2] += 2200;
-  rd->thresh_mult[THR_COMP_NEW_NEWLL2] += 2400;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] += 3200;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARLL3] += 1600;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] += 2000;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 2000;
-  rd->thresh_mult[THR_COMP_NEAR_NEWLL3] += 2200;
-  rd->thresh_mult[THR_COMP_NEW_NEARLL3] += 2200;
-  rd->thresh_mult[THR_COMP_NEW_NEWLL3] += 2400;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] += 3200;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1600;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 2000;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTLG] += 2000;
-  rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 2200;
-  rd->thresh_mult[THR_COMP_NEW_NEARLG] += 2200;
-  rd->thresh_mult[THR_COMP_NEW_NEWLG] += 2400;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] += 3200;
-
-  rd->thresh_mult[THR_COMP_NEAR_NEARBA] += 1600;
-  rd->thresh_mult[THR_COMP_NEAREST_NEWBA] += 2000;
-  rd->thresh_mult[THR_COMP_NEW_NEARESTBA] += 2000;
-  rd->thresh_mult[THR_COMP_NEAR_NEWBA] += 2200;
-  rd->thresh_mult[THR_COMP_NEW_NEARBA] += 2200;
-  rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2400;
-  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] += 3200;
-
-  rd->thresh_mult[THR_DC] += 1000;
-  rd->thresh_mult[THR_PAETH] += 1000;
-  rd->thresh_mult[THR_SMOOTH] += 2000;
-  rd->thresh_mult[THR_SMOOTH_V] += 2000;
-  rd->thresh_mult[THR_SMOOTH_H] += 2000;
-  rd->thresh_mult[THR_H_PRED] += 2000;
-  rd->thresh_mult[THR_V_PRED] += 2000;
-  rd->thresh_mult[THR_D135_PRED] += 2500;
-  rd->thresh_mult[THR_D203_PRED] += 2500;
-  rd->thresh_mult[THR_D157_PRED] += 2500;
-  rd->thresh_mult[THR_D67_PRED] += 2500;
-  rd->thresh_mult[THR_D113_PRED] += 2500;
-  rd->thresh_mult[THR_D45_PRED] += 2500;
-}
-
-void av1_set_rd_speed_thresholds_sub8x8(AV1_COMP *cpi) {
-  static const int thresh_mult[MAX_REFS] = { 2500, 2500, 2500, 2500, 2500,
-                                             2500, 2500, 4500, 4500, 4500,
-                                             4500, 4500, 4500, 4500, 4500,
-                                             4500, 4500, 4500, 4500, 2500 };
-  RD_OPT *const rd = &cpi->rd;
-  memcpy(rd->thresh_mult_sub8x8, thresh_mult, sizeof(thresh_mult));
-}
-
-void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
-                               int (*factor_buf)[MAX_MODES], int rd_thresh,
-                               int bsize, int best_mode_index) {
-  if (rd_thresh > 0) {
-    const int top_mode = MAX_MODES;
-    int mode;
-    for (mode = 0; mode < top_mode; ++mode) {
-      const BLOCK_SIZE min_size = AOMMAX(bsize - 1, BLOCK_4X4);
-      const BLOCK_SIZE max_size =
-          AOMMIN(bsize + 2, (int)cm->seq_params.sb_size);
-      BLOCK_SIZE bs;
-      for (bs = min_size; bs <= max_size; ++bs) {
-        int *const fact = &factor_buf[bs][mode];
-        if (mode == best_mode_index) {
-          *fact -= (*fact >> 4);
-        } else {
-          *fact = AOMMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
-        }
-      }
-    }
-  }
-}
-
-int av1_get_intra_cost_penalty(int qindex, int qdelta,
-                               aom_bit_depth_t bit_depth) {
-  const int q = av1_dc_quant_Q3(qindex, qdelta, bit_depth);
-  switch (bit_depth) {
-    case AOM_BITS_8: return 20 * q;
-    case AOM_BITS_10: return 5 * q;
-    case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
-    default:
-      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
-      return -1;
-  }
-}
diff --git a/third_party/aom/av1/encoder/rd.h b/third_party/aom/av1/encoder/rd.h
deleted file mode 100644
index 755b61df5..000000000
--- a/third_party/aom/av1/encoder/rd.h
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RD_H_
-#define AOM_AV1_ENCODER_RD_H_
-
-#include <limits.h>
-
-#include "av1/common/blockd.h"
-
-#include "av1/encoder/block.h"
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/cost.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define RDDIV_BITS 7
-#define RD_EPB_SHIFT 6
-
-#define RDCOST(RM, R, D)                                            \
-  (ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), AV1_PROB_COST_SHIFT) + \
-   ((D) * (1 << RDDIV_BITS)))
-
-#define RDCOST_DBL(RM, R, D)                                       \
-  (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \
-   ((double)(D) * (1 << RDDIV_BITS)))
-
-#define QIDX_SKIP_THRESH 115
-
-#define MV_COST_WEIGHT 108
-#define MV_COST_WEIGHT_SUB 120
-
-#define RD_THRESH_MAX_FACT 64
-#define RD_THRESH_INC 1
-
-// Factor to weigh the rate for switchable interp filters.
-#define SWITCHABLE_INTERP_RATE_FACTOR 1
-
-// This enumerator type needs to be kept aligned with the mode order in
-// const MODE_DEFINITION av1_mode_order[MAX_MODES] used in the rd code.
-typedef enum {
-  THR_NEARESTMV,
-  THR_NEARESTL2,
-  THR_NEARESTL3,
-  THR_NEARESTB,
-  THR_NEARESTA2,
-  THR_NEARESTA,
-  THR_NEARESTG,
-
-  THR_NEWMV,
-  THR_NEWL2,
-  THR_NEWL3,
-  THR_NEWB,
-  THR_NEWA2,
-  THR_NEWA,
-  THR_NEWG,
-
-  THR_NEARMV,
-  THR_NEARL2,
-  THR_NEARL3,
-  THR_NEARB,
-  THR_NEARA2,
-  THR_NEARA,
-  THR_NEARG,
-
-  THR_GLOBALMV,
-  THR_GLOBALL2,
-  THR_GLOBALL3,
-  THR_GLOBALB,
-  THR_GLOBALA2,
-  THR_GLOBALA,
-  THR_GLOBALG,
-
-  THR_COMP_NEAREST_NEARESTLA,
-  THR_COMP_NEAREST_NEARESTL2A,
-  THR_COMP_NEAREST_NEARESTL3A,
-  THR_COMP_NEAREST_NEARESTGA,
-  THR_COMP_NEAREST_NEARESTLB,
-  THR_COMP_NEAREST_NEARESTL2B,
-  THR_COMP_NEAREST_NEARESTL3B,
-  THR_COMP_NEAREST_NEARESTGB,
-  THR_COMP_NEAREST_NEARESTLA2,
-  THR_COMP_NEAREST_NEARESTL2A2,
-  THR_COMP_NEAREST_NEARESTL3A2,
-  THR_COMP_NEAREST_NEARESTGA2,
-  THR_COMP_NEAREST_NEARESTLL2,
-  THR_COMP_NEAREST_NEARESTLL3,
-  THR_COMP_NEAREST_NEARESTLG,
-  THR_COMP_NEAREST_NEARESTBA,
-
-  THR_COMP_NEAR_NEARLA,
-  THR_COMP_NEW_NEARESTLA,
-  THR_COMP_NEAREST_NEWLA,
-  THR_COMP_NEW_NEARLA,
-  THR_COMP_NEAR_NEWLA,
-  THR_COMP_NEW_NEWLA,
-  THR_COMP_GLOBAL_GLOBALLA,
-
-  THR_COMP_NEAR_NEARL2A,
-  THR_COMP_NEW_NEARESTL2A,
-  THR_COMP_NEAREST_NEWL2A,
-  THR_COMP_NEW_NEARL2A,
-  THR_COMP_NEAR_NEWL2A,
-  THR_COMP_NEW_NEWL2A,
-  THR_COMP_GLOBAL_GLOBALL2A,
-
-  THR_COMP_NEAR_NEARL3A,
-  THR_COMP_NEW_NEARESTL3A,
-  THR_COMP_NEAREST_NEWL3A,
-  THR_COMP_NEW_NEARL3A,
-  THR_COMP_NEAR_NEWL3A,
-  THR_COMP_NEW_NEWL3A,
-  THR_COMP_GLOBAL_GLOBALL3A,
-
-  THR_COMP_NEAR_NEARGA,
-  THR_COMP_NEW_NEARESTGA,
-  THR_COMP_NEAREST_NEWGA,
-  THR_COMP_NEW_NEARGA,
-  THR_COMP_NEAR_NEWGA,
-  THR_COMP_NEW_NEWGA,
-  THR_COMP_GLOBAL_GLOBALGA,
-
-  THR_COMP_NEAR_NEARLB,
-  THR_COMP_NEW_NEARESTLB,
-  THR_COMP_NEAREST_NEWLB,
-  THR_COMP_NEW_NEARLB,
-  THR_COMP_NEAR_NEWLB,
-  THR_COMP_NEW_NEWLB,
-  THR_COMP_GLOBAL_GLOBALLB,
-
-  THR_COMP_NEAR_NEARL2B,
-  THR_COMP_NEW_NEARESTL2B,
-  THR_COMP_NEAREST_NEWL2B,
-  THR_COMP_NEW_NEARL2B,
-  THR_COMP_NEAR_NEWL2B,
-  THR_COMP_NEW_NEWL2B,
-  THR_COMP_GLOBAL_GLOBALL2B,
-
-  THR_COMP_NEAR_NEARL3B,
-  THR_COMP_NEW_NEARESTL3B,
-  THR_COMP_NEAREST_NEWL3B,
-  THR_COMP_NEW_NEARL3B,
-  THR_COMP_NEAR_NEWL3B,
-  THR_COMP_NEW_NEWL3B,
-  THR_COMP_GLOBAL_GLOBALL3B,
-
-  THR_COMP_NEAR_NEARGB,
-  THR_COMP_NEW_NEARESTGB,
-  THR_COMP_NEAREST_NEWGB,
-  THR_COMP_NEW_NEARGB,
-  THR_COMP_NEAR_NEWGB,
-  THR_COMP_NEW_NEWGB,
-  THR_COMP_GLOBAL_GLOBALGB,
-
-  THR_COMP_NEAR_NEARLA2,
-  THR_COMP_NEW_NEARESTLA2,
-  THR_COMP_NEAREST_NEWLA2,
-  THR_COMP_NEW_NEARLA2,
-  THR_COMP_NEAR_NEWLA2,
-  THR_COMP_NEW_NEWLA2,
-  THR_COMP_GLOBAL_GLOBALLA2,
-
-  THR_COMP_NEAR_NEARL2A2,
-  THR_COMP_NEW_NEARESTL2A2,
-  THR_COMP_NEAREST_NEWL2A2,
-  THR_COMP_NEW_NEARL2A2,
-  THR_COMP_NEAR_NEWL2A2,
-  THR_COMP_NEW_NEWL2A2,
-  THR_COMP_GLOBAL_GLOBALL2A2,
-
-  THR_COMP_NEAR_NEARL3A2,
-  THR_COMP_NEW_NEARESTL3A2,
-  THR_COMP_NEAREST_NEWL3A2,
-  THR_COMP_NEW_NEARL3A2,
-  THR_COMP_NEAR_NEWL3A2,
-  THR_COMP_NEW_NEWL3A2,
-  THR_COMP_GLOBAL_GLOBALL3A2,
-
-  THR_COMP_NEAR_NEARGA2,
-  THR_COMP_NEW_NEARESTGA2,
-  THR_COMP_NEAREST_NEWGA2,
-  THR_COMP_NEW_NEARGA2,
-  THR_COMP_NEAR_NEWGA2,
-  THR_COMP_NEW_NEWGA2,
-  THR_COMP_GLOBAL_GLOBALGA2,
-
-  THR_COMP_NEAR_NEARLL2,
-  THR_COMP_NEW_NEARESTLL2,
-  THR_COMP_NEAREST_NEWLL2,
-  THR_COMP_NEW_NEARLL2,
-  THR_COMP_NEAR_NEWLL2,
-  THR_COMP_NEW_NEWLL2,
-  THR_COMP_GLOBAL_GLOBALLL2,
-
-  THR_COMP_NEAR_NEARLL3,
-  THR_COMP_NEW_NEARESTLL3,
-  THR_COMP_NEAREST_NEWLL3,
-  THR_COMP_NEW_NEARLL3,
-  THR_COMP_NEAR_NEWLL3,
-  THR_COMP_NEW_NEWLL3,
-  THR_COMP_GLOBAL_GLOBALLL3,
-
-  THR_COMP_NEAR_NEARLG,
-  THR_COMP_NEW_NEARESTLG,
-  THR_COMP_NEAREST_NEWLG,
-  THR_COMP_NEW_NEARLG,
-  THR_COMP_NEAR_NEWLG,
-  THR_COMP_NEW_NEWLG,
-  THR_COMP_GLOBAL_GLOBALLG,
-
-  THR_COMP_NEAR_NEARBA,
-  THR_COMP_NEW_NEARESTBA,
-  THR_COMP_NEAREST_NEWBA,
-  THR_COMP_NEW_NEARBA,
-  THR_COMP_NEAR_NEWBA,
-  THR_COMP_NEW_NEWBA,
-  THR_COMP_GLOBAL_GLOBALBA,
-
-  THR_DC,
-  THR_PAETH,
-  THR_SMOOTH,
-  THR_SMOOTH_V,
-  THR_SMOOTH_H,
-  THR_H_PRED,
-  THR_V_PRED,
-  THR_D135_PRED,
-  THR_D203_PRED,
-  THR_D157_PRED,
-  THR_D67_PRED,
-  THR_D113_PRED,
-  THR_D45_PRED,
-
-  MAX_MODES,
-
-  LAST_SINGLE_REF_MODES = THR_GLOBALG,
-  MAX_SINGLE_REF_MODES = LAST_SINGLE_REF_MODES + 1,
-  LAST_COMP_REF_MODES = THR_COMP_GLOBAL_GLOBALBA,
-  MAX_COMP_REF_MODES = LAST_COMP_REF_MODES + 1
-} THR_MODES;
-
-typedef enum {
-  THR_LAST,
-  THR_LAST2,
-  THR_LAST3,
-  THR_BWDR,
-  THR_ALTR2,
-  THR_GOLD,
-  THR_ALTR,
-
-  THR_COMP_LA,
-  THR_COMP_L2A,
-  THR_COMP_L3A,
-  THR_COMP_GA,
-
-  THR_COMP_LB,
-  THR_COMP_L2B,
-  THR_COMP_L3B,
-  THR_COMP_GB,
-
-  THR_COMP_LA2,
-  THR_COMP_L2A2,
-  THR_COMP_L3A2,
-  THR_COMP_GA2,
-
-  THR_INTRA,
-
-  MAX_REFS
-} THR_MODES_SUB8X8;
-
-typedef struct RD_OPT {
-  // Thresh_mult is used to set a threshold for the rd score. A higher value
-  // means that we will accept the best mode so far more often. This number
-  // is used in combination with the current block size, and thresh_freq_fact
-  // to pick a threshold.
-  int thresh_mult[MAX_MODES];
-  int thresh_mult_sub8x8[MAX_REFS];
-
-  int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES];
-
-  int64_t prediction_type_threshes[REF_FRAMES][REFERENCE_MODES];
-
-  int RDMULT;
-} RD_OPT;
-
-static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
-#if CONFIG_RD_DEBUG
-  int plane;
-#endif
-  rd_stats->rate = 0;
-  rd_stats->dist = 0;
-  rd_stats->rdcost = 0;
-  rd_stats->sse = 0;
-  rd_stats->skip = 1;
-  rd_stats->zero_rate = 0;
-  rd_stats->invalid_rate = 0;
-  rd_stats->ref_rdcost = INT64_MAX;
-#if CONFIG_RD_DEBUG
-  // This may run into problems when monochrome video is
-  // encoded, as there will only be 1 plane
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    rd_stats->txb_coeff_cost[plane] = 0;
-    {
-      int r, c;
-      for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
-        for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
-          rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
-    }
-  }
-#endif
-}
-
-static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
-#if CONFIG_RD_DEBUG
-  int plane;
-#endif
-  rd_stats->rate = INT_MAX;
-  rd_stats->dist = INT64_MAX;
-  rd_stats->rdcost = INT64_MAX;
-  rd_stats->sse = INT64_MAX;
-  rd_stats->skip = 0;
-  rd_stats->zero_rate = 0;
-  rd_stats->invalid_rate = 1;
-  rd_stats->ref_rdcost = INT64_MAX;
-#if CONFIG_RD_DEBUG
-  // This may run into problems when monochrome video is
-  // encoded, as there will only be 1 plane
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    rd_stats->txb_coeff_cost[plane] = INT_MAX;
-    {
-      int r, c;
-      for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
-        for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
-          rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX;
-    }
-  }
-#endif
-}
-
-static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
-                                      const RD_STATS *rd_stats_src) {
-#if CONFIG_RD_DEBUG
-  int plane;
-#endif
-  rd_stats_dst->rate += rd_stats_src->rate;
-  if (!rd_stats_dst->zero_rate)
-    rd_stats_dst->zero_rate = rd_stats_src->zero_rate;
-  rd_stats_dst->dist += rd_stats_src->dist;
-  rd_stats_dst->sse += rd_stats_src->sse;
-  rd_stats_dst->skip &= rd_stats_src->skip;
-  rd_stats_dst->invalid_rate &= rd_stats_src->invalid_rate;
-#if CONFIG_RD_DEBUG
-  // This may run into problems when monochrome video is
-  // encoded, as there will only be 1 plane
-  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
-    {
-      // TODO(angiebird): optimize this part
-      int r, c;
-      int ref_txb_coeff_cost = 0;
-      for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
-        for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
-          rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
-              rd_stats_src->txb_coeff_cost_map[plane][r][c];
-          ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
-        }
-      assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
-    }
-  }
-#endif
-}
-
-struct TileInfo;
-struct TileDataEnc;
-struct AV1_COMP;
-struct macroblock;
-
-int av1_compute_rd_mult(const struct AV1_COMP *cpi, int qindex);
-
-void av1_initialize_rd_consts(struct AV1_COMP *cpi);
-
-void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                              int qindex);
-
-void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
-                                  unsigned int qstep, int *rate, int64_t *dist);
-
-void av1_model_rd_curvfit(double xqr, double *rate_f, double *distbysse_f);
-void av1_model_rd_surffit(double xm, double yl, double *rate_f,
-                          double *distbysse_f);
-
-int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
-                            const MACROBLOCKD *xd);
-
-int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
-                            int stride);
-
-int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
-                                       int16_t *base);
-
-YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi,
-                                             int ref_frame);
-
-void av1_init_me_luts(void);
-
-void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx);
-
-void av1_get_entropy_contexts(BLOCK_SIZE bsize,
-                              const struct macroblockd_plane *pd,
-                              ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
-                              ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]);
-
-void av1_set_rd_speed_thresholds(struct AV1_COMP *cpi);
-
-void av1_set_rd_speed_thresholds_sub8x8(struct AV1_COMP *cpi);
-
-void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
-                               int (*fact)[MAX_MODES], int rd_thresh, int bsize,
-                               int best_mode_index);
-
-static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
-                                      int thresh_fact) {
-  return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
-}
-
-void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x,
-                 uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame,
-                 BLOCK_SIZE block_size);
-
-static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
-  x->errorperbit = rdmult >> RD_EPB_SHIFT;
-  x->errorperbit += (x->errorperbit == 0);
-}
-
-void av1_setup_pred_block(const MACROBLOCKD *xd,
-                          struct buf_2d dst[MAX_MB_PLANE],
-                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
-                          const struct scale_factors *scale,
-                          const struct scale_factors *scale_uv,
-                          const int num_planes);
-
-int av1_get_intra_cost_penalty(int qindex, int qdelta,
-                               aom_bit_depth_t bit_depth);
-
-void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
-                         FRAME_CONTEXT *fc);
-
-void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
-                          const int num_planes);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_RD_H_
diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c
deleted file mode 100644
index c2d15534f..000000000
--- a/third_party/aom/av1/encoder/rdopt.c
+++ /dev/null
@@ -1,12199 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-
-#include "config/aom_dsp_rtcd.h"
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/system_state.h"
-
-#include "av1/common/cfl.h"
-#include "av1/common/common.h"
-#include "av1/common/common_data.h"
-#include "av1/common/entropy.h"
-#include "av1/common/entropymode.h"
-#include "av1/common/idct.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/obmc.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/scan.h"
-#include "av1/common/seg_common.h"
-#include "av1/common/txb_common.h"
-#include "av1/common/warped_motion.h"
-
-#include "av1/encoder/aq_variance.h"
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encodemb.h"
-#include "av1/encoder/encodemv.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/hybrid_fwd_txfm.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/ml.h"
-#include "av1/encoder/palette.h"
-#include "av1/encoder/pustats.h"
-#include "av1/encoder/random.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/rd.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/reconinter_enc.h"
-#include "av1/encoder/tokenize.h"
-#include "av1/encoder/tx_prune_model_weights.h"
-
-typedef void (*model_rd_for_sb_type)(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-typedef void (*model_rd_from_sse_type)(const AV1_COMP *const cpi,
-                                       const MACROBLOCK *const x,
-                                       BLOCK_SIZE plane_bsize, int plane,
-                                       int64_t sse, int num_samples, int *rate,
-                                       int64_t *dist);
-
-static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
-                            int plane_to, int mi_row, int mi_col,
-                            int *out_rate_sum, int64_t *out_dist_sum,
-                            int *skip_txfm_sb, int64_t *skip_sse_sb,
-                            int *plane_rate, int64_t *plane_sse,
-                            int64_t *plane_dist);
-static void model_rd_for_sb_with_curvfit(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_surffit(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_dnn(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_fullrdy(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_from_sse(const AV1_COMP *const cpi,
-                              const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
-                              int plane, int64_t sse, int num_samples,
-                              int *rate, int64_t *dist);
-static void model_rd_with_dnn(const AV1_COMP *const cpi,
-                              const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
-                              int plane, int64_t sse, int num_samples,
-                              int *rate, int64_t *dist);
-static void model_rd_with_curvfit(const AV1_COMP *const cpi,
-                                  const MACROBLOCK *const x,
-                                  BLOCK_SIZE plane_bsize, int plane,
-                                  int64_t sse, int num_samples, int *rate,
-                                  int64_t *dist);
-static void model_rd_with_surffit(const AV1_COMP *const cpi,
-                                  const MACROBLOCK *const x,
-                                  BLOCK_SIZE plane_bsize, int plane,
-                                  int64_t sse, int num_samples, int *rate,
-                                  int64_t *dist);
-
-typedef enum {
-  MODELRD_LEGACY,
-  MODELRD_CURVFIT,
-  MODELRD_SUFFIT,
-  MODELRD_DNN,
-  MODELRD_FULLRDY,
-  MODELRD_TYPES
-} ModelRdType;
-
-static model_rd_for_sb_type model_rd_sb_fn[MODELRD_TYPES] = {
-  model_rd_for_sb, model_rd_for_sb_with_curvfit, model_rd_for_sb_with_surffit,
-  model_rd_for_sb_with_dnn, model_rd_for_sb_with_fullrdy
-};
-
-static model_rd_from_sse_type model_rd_sse_fn[MODELRD_TYPES] = {
-  model_rd_from_sse, model_rd_with_curvfit, model_rd_with_surffit,
-  model_rd_with_dnn, NULL
-};
-
-// 0: Legacy model
-// 1: Curve fit model
-// 2: Surface fit model
-// 3: DNN regression model
-// 4: Full rd model
-#define MODELRD_TYPE_INTERP_FILTER 1
-#define MODELRD_TYPE_TX_SEARCH_PRUNE 2
-#define MODELRD_TYPE_MASKED_COMPOUND 1
-#define MODELRD_TYPE_INTERINTRA 1
-#define MODELRD_TYPE_INTRA 1
-#define MODELRD_TYPE_JNT_COMPOUND 1
-
-#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
-static const InterpFilters filter_sets[DUAL_FILTER_SET_SIZE] = {
-  0x00000000, 0x00010000, 0x00020000,  // y = 0
-  0x00000001, 0x00010001, 0x00020001,  // y = 1
-  0x00000002, 0x00010002, 0x00020002,  // y = 2
-};
-
-#define SECOND_REF_FRAME_MASK                                         \
-  ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \
-   (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01)
-
-#define ANGLE_SKIP_THRESH 10
-
-static const double ADST_FLIP_SVM[8] = {
-  /* vertical */
-  -6.6623, -2.8062, -3.2531, 3.1671,
-  /* horizontal */
-  -7.7051, -3.2234, -3.6193, 3.4533
-};
-
-typedef struct {
-  PREDICTION_MODE mode;
-  MV_REFERENCE_FRAME ref_frame[2];
-} MODE_DEFINITION;
-
-typedef struct {
-  MV_REFERENCE_FRAME ref_frame[2];
-} REF_DEFINITION;
-
-typedef enum {
-  FTXS_NONE = 0,
-  FTXS_DCT_AND_1D_DCT_ONLY = 1 << 0,
-  FTXS_DISABLE_TRELLIS_OPT = 1 << 1,
-  FTXS_USE_TRANSFORM_DOMAIN = 1 << 2
-} FAST_TX_SEARCH_MODE;
-
-static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
-                               RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row,
-                               int mi_col, int64_t ref_best_rd);
-
-static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
-                            RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                            int64_t non_skip_ref_best_rd,
-                            int64_t skip_ref_best_rd,
-                            FAST_TX_SEARCH_MODE ftxs_mode);
-
-struct rdcost_block_args {
-  const AV1_COMP *cpi;
-  MACROBLOCK *x;
-  ENTROPY_CONTEXT t_above[MAX_MIB_SIZE];
-  ENTROPY_CONTEXT t_left[MAX_MIB_SIZE];
-  RD_STATS rd_stats;
-  int64_t this_rd;
-  int64_t best_rd;
-  int exit_early;
-  int incomplete_exit;
-  int use_fast_coef_costing;
-  FAST_TX_SEARCH_MODE ftxs_mode;
-};
-
-#define LAST_NEW_MV_INDEX 6
-static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
-  { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
-  { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
-  { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
-  { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
-  { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } },
-  { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
-  { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
-
-  { NEWMV, { LAST_FRAME, NONE_FRAME } },
-  { NEWMV, { LAST2_FRAME, NONE_FRAME } },
-  { NEWMV, { LAST3_FRAME, NONE_FRAME } },
-  { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
-  { NEWMV, { ALTREF2_FRAME, NONE_FRAME } },
-  { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
-  { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
-
-  { NEARMV, { LAST_FRAME, NONE_FRAME } },
-  { NEARMV, { LAST2_FRAME, NONE_FRAME } },
-  { NEARMV, { LAST3_FRAME, NONE_FRAME } },
-  { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
-  { NEARMV, { ALTREF2_FRAME, NONE_FRAME } },
-  { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
-  { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
-
-  { GLOBALMV, { LAST_FRAME, NONE_FRAME } },
-  { GLOBALMV, { LAST2_FRAME, NONE_FRAME } },
-  { GLOBALMV, { LAST3_FRAME, NONE_FRAME } },
-  { GLOBALMV, { BWDREF_FRAME, NONE_FRAME } },
-  { GLOBALMV, { ALTREF2_FRAME, NONE_FRAME } },
-  { GLOBALMV, { GOLDEN_FRAME, NONE_FRAME } },
-  { GLOBALMV, { ALTREF_FRAME, NONE_FRAME } },
-
-  // TODO(zoeliu): May need to reconsider the order on the modes to check
-
-  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
-  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
-  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
-  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-
-  { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
-
-  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
-  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
-  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
-  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
-  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } },
-
-  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } },
-
-  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
-  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
-  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
-  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
-  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } },
-
-  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-  { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-
-  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } },
-
-  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } },
-
-  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } },
-
-  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-  { GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-
-  { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
-  { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
-  { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
-  { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
-  { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
-  { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } },
-
-  { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
-  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
-  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
-  { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
-  { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
-  { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } },
-
-  { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
-  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
-  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
-  { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
-  { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
-  { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } },
-
-  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-  { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
-
-  { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } },
-
-  { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } },
-
-  { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } },
-
-  { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
-  { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
-  { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
-  { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
-  { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
-  { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
-  { GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } },
-
-  // intra modes
-  { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { PAETH_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { H_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { V_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { D203_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { D157_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { D67_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { D113_PRED, { INTRA_FRAME, NONE_FRAME } },
-  { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
-};
-
-static const int16_t intra_to_mode_idx[INTRA_MODE_NUM] = {
-  7,    // DC_PRED,
-  134,  // V_PRED,
-  133,  // H_PRED,
-  140,  // D45_PRED,
-  135,  // D135_PRED,
-  139,  // D113_PRED,
-  137,  // D157_PRED,
-  136,  // D203_PRED,
-  138,  // D67_PRED,
-  46,   // SMOOTH_PRED,
-  47,   // SMOOTH_V_PRED,
-  48,   // SMOOTH_H_PRED,
-  45,   // PAETH_PRED,
-};
-
-/* clang-format off */
-static const int16_t single_inter_to_mode_idx[SINGLE_INTER_MODE_NUM]
-                                             [REF_FRAMES] = {
-  // NEARESTMV,
-  { -1, 0, 1, 2, 6, 3, 4, 5, },
-  // NEARMV,
-  { -1, 15, 16, 17, 21, 18, 19, 20, },
-  // GLOBALMV,
-  { -1, 22, 23, 24, 27, 25, 26, 28, },
-  // NEWMV,
-  { -1, 8, 9, 10, 14, 11, 12, 13, },
-};
-/* clang-format on */
-
-/* clang-format off */
-static const int16_t comp_inter_to_mode_idx[COMP_INTER_MODE_NUM][REF_FRAMES]
-                                     [REF_FRAMES] = {
-  // NEAREST_NEARESTMV,
-  {
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, 41, 42, 43, 33, 37, 29, },
-      { -1, -1, -1, -1, -1, 34, 38, 30, },
-      { -1, -1, -1, -1, -1, 35, 39, 31, },
-      { -1, -1, -1, -1, -1, 36, 40, 32, },
-      { -1, -1, -1, -1, -1, -1, -1, 44, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-  },
-  // NEAR_NEARMV,
-  {
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, 141, 148, 155, 77, 105, 49, },
-      { -1, -1, -1, -1, -1, 84, 112, 56, },
-      { -1, -1, -1, -1, -1, 91, 119, 63, },
-      { -1, -1, -1, -1, -1, 98, 126, 70, },
-      { -1, -1, -1, -1, -1, -1, -1, 162, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-  },
-  // NEAREST_NEWMV,
-  {
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, 143, 150, 157, 79, 107, 51, },
-      { -1, -1, -1, -1, -1, 86, 114, 58, },
-      { -1, -1, -1, -1, -1, 93, 121, 65, },
-      { -1, -1, -1, -1, -1, 100, 128, 72, },
-      { -1, -1, -1, -1, -1, -1, -1, 164, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-  },
-  // NEW_NEARESTMV,
-  {
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, 142, 149, 156, 78, 106, 50, },
-      { -1, -1, -1, -1, -1, 85, 113, 57, },
-      { -1, -1, -1, -1, -1, 92, 120, 64, },
-      { -1, -1, -1, -1, -1, 99, 127, 71, },
-      { -1, -1, -1, -1, -1, -1, -1, 163, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-  },
-  // NEAR_NEWMV,
-  {
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, 145, 152, 159, 81, 109, 53, },
-      { -1, -1, -1, -1, -1, 88, 116, 60, },
-      { -1, -1, -1, -1, -1, 95, 123, 67, },
-      { -1, -1, -1, -1, -1, 102, 130, 74, },
-      { -1, -1, -1, -1, -1, -1, -1, 166, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-  },
-  // NEW_NEARMV,
-  {
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, 144, 151, 158, 80, 108, 52, },
-      { -1, -1, -1, -1, -1, 87, 115, 59, },
-      { -1, -1, -1, -1, -1, 94, 122, 66, },
-      { -1, -1, -1, -1, -1, 101, 129, 73, },
-      { -1, -1, -1, -1, -1, -1, -1, 165, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-  },
-  // GLOBAL_GLOBALMV,
-  {
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, 147, 154, 161, 83, 111, 55, },
-      { -1, -1, -1, -1, -1, 90, 118, 62, },
-      { -1, -1, -1, -1, -1, 97, 125, 69, },
-      { -1, -1, -1, -1, -1, 104, 132, 76, },
-      { -1, -1, -1, -1, -1, -1, -1, 168, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-  },
-  // NEW_NEWMV,
-  {
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, 146, 153, 160, 82, 110, 54, },
-      { -1, -1, -1, -1, -1, 89, 117, 61, },
-      { -1, -1, -1, -1, -1, 96, 124, 68, },
-      { -1, -1, -1, -1, -1, 103, 131, 75, },
-      { -1, -1, -1, -1, -1, -1, -1, 167, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-      { -1, -1, -1, -1, -1, -1, -1, -1, },
-  },
-};
-/* clang-format on */
-
-static int get_prediction_mode_idx(PREDICTION_MODE this_mode,
-                                   MV_REFERENCE_FRAME ref_frame,
-                                   MV_REFERENCE_FRAME second_ref_frame) {
-  if (this_mode < INTRA_MODE_END) {
-    assert(ref_frame == INTRA_FRAME);
-    assert(second_ref_frame == NONE_FRAME);
-    return intra_to_mode_idx[this_mode - INTRA_MODE_START];
-  }
-  if (this_mode >= SINGLE_INTER_MODE_START &&
-      this_mode < SINGLE_INTER_MODE_END) {
-    assert((ref_frame > INTRA_FRAME) && (ref_frame <= ALTREF_FRAME));
-    return single_inter_to_mode_idx[this_mode - SINGLE_INTER_MODE_START]
-                                   [ref_frame];
-  }
-  if (this_mode >= COMP_INTER_MODE_START && this_mode < COMP_INTER_MODE_END) {
-    assert((ref_frame > INTRA_FRAME) && (ref_frame <= ALTREF_FRAME));
-    assert((second_ref_frame > INTRA_FRAME) &&
-           (second_ref_frame <= ALTREF_FRAME));
-    return comp_inter_to_mode_idx[this_mode - COMP_INTER_MODE_START][ref_frame]
-                                 [second_ref_frame];
-  }
-  assert(0);
-  return -1;
-}
-
-static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
-  DC_PRED,       H_PRED,        V_PRED,    SMOOTH_PRED, PAETH_PRED,
-  SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED,   D157_PRED,
-  D67_PRED,      D113_PRED,     D45_PRED,
-};
-
-static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
-  UV_DC_PRED,     UV_CFL_PRED,   UV_H_PRED,        UV_V_PRED,
-  UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
-  UV_D135_PRED,   UV_D203_PRED,  UV_D157_PRED,     UV_D67_PRED,
-  UV_D113_PRED,   UV_D45_PRED,
-};
-
-typedef struct SingleInterModeState {
-  int64_t rd;
-  MV_REFERENCE_FRAME ref_frame;
-  int valid;
-} SingleInterModeState;
-
-typedef struct InterModeSearchState {
-  int64_t best_rd;
-  MB_MODE_INFO best_mbmode;
-  int best_rate_y;
-  int best_rate_uv;
-  int best_mode_skippable;
-  int best_skip2;
-  int best_mode_index;
-  int skip_intra_modes;
-  int num_available_refs;
-  int64_t dist_refs[REF_FRAMES];
-  int dist_order_refs[REF_FRAMES];
-  int64_t mode_threshold[MAX_MODES];
-  PREDICTION_MODE best_intra_mode;
-  int64_t best_intra_rd;
-  int angle_stats_ready;
-  uint8_t directional_mode_skip_mask[INTRA_MODES];
-  unsigned int best_pred_sse;
-  int rate_uv_intra[TX_SIZES_ALL];
-  int rate_uv_tokenonly[TX_SIZES_ALL];
-  int64_t dist_uvs[TX_SIZES_ALL];
-  int skip_uvs[TX_SIZES_ALL];
-  UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
-  PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
-  int8_t uv_angle_delta[TX_SIZES_ALL];
-  int64_t best_pred_rd[REFERENCE_MODES];
-  int64_t best_pred_diff[REFERENCE_MODES];
-  // Save a set of single_newmv for each checked ref_mv.
-  int_mv single_newmv[MAX_REF_MV_SERCH][REF_FRAMES];
-  int single_newmv_rate[MAX_REF_MV_SERCH][REF_FRAMES];
-  int single_newmv_valid[MAX_REF_MV_SERCH][REF_FRAMES];
-  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SERCH][REF_FRAMES];
-  // The rd of simple translation in single inter modes
-  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SERCH][REF_FRAMES];
-
-  // Single search results by [directions][modes][reference frames]
-  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
-  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
-  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
-                                            [FWD_REFS];
-  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
-
-  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
-} InterModeSearchState;
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-int inter_mode_data_block_idx(BLOCK_SIZE bsize) {
-  if (bsize == BLOCK_8X8) return 1;
-  if (bsize == BLOCK_16X16) return 2;
-  if (bsize == BLOCK_32X32) return 3;
-  return -1;
-}
-
-void av1_inter_mode_data_init(TileDataEnc *tile_data) {
-  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
-    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
-    md->ready = 0;
-    md->num = 0;
-    md->dist_sum = 0;
-    md->ld_sum = 0;
-    md->sse_sum = 0;
-    md->sse_sse_sum = 0;
-    md->sse_ld_sum = 0;
-  }
-}
-
-static int get_est_rate_dist(TileDataEnc *tile_data, BLOCK_SIZE bsize,
-                             int64_t sse, int *est_residue_cost,
-                             int64_t *est_dist) {
-  aom_clear_system_state();
-  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
-  if (md->ready) {
-    const double est_ld = md->a * sse + md->b;
-    if (sse < md->dist_mean) {
-      *est_residue_cost = 0;
-      *est_dist = sse;
-    } else {
-      *est_residue_cost = (int)round((sse - md->dist_mean) / est_ld);
-      *est_dist = (int64_t)round(md->dist_mean);
-    }
-    return 1;
-  }
-  return 0;
-}
-
-static int64_t get_est_rd(TileDataEnc *tile_data, BLOCK_SIZE bsize, int rdmult,
-                          int64_t sse, int curr_cost) {
-  int est_residue_cost;
-  int64_t est_dist;
-  if (get_est_rate_dist(tile_data, bsize, sse, &est_residue_cost, &est_dist)) {
-    int rate = est_residue_cost + curr_cost;
-    int64_t est_rd = RDCOST(rdmult, rate, est_dist);
-    return est_rd;
-  }
-  return 0;
-}
-
-void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
-  aom_clear_system_state();
-  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
-    const int block_idx = inter_mode_data_block_idx(bsize);
-    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
-    if (block_idx == -1) continue;
-    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
-      continue;
-    } else {
-      if (md->ready == 0) {
-        md->dist_mean = md->dist_sum / md->num;
-        md->ld_mean = md->ld_sum / md->num;
-        md->sse_mean = md->sse_sum / md->num;
-        md->sse_sse_mean = md->sse_sse_sum / md->num;
-        md->sse_ld_mean = md->sse_ld_sum / md->num;
-      } else {
-        const double factor = 3;
-        md->dist_mean =
-            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
-        md->ld_mean =
-            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
-        md->sse_mean =
-            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
-        md->sse_sse_mean =
-            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
-            (factor + 1);
-        md->sse_ld_mean =
-            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
-            (factor + 1);
-      }
-
-      const double my = md->ld_mean;
-      const double mx = md->sse_mean;
-      const double dx = sqrt(md->sse_sse_mean);
-      const double dxy = md->sse_ld_mean;
-
-      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
-      md->b = my - md->a * mx;
-      md->ready = 1;
-
-      md->num = 0;
-      md->dist_sum = 0;
-      md->ld_sum = 0;
-      md->sse_sum = 0;
-      md->sse_sse_sum = 0;
-      md->sse_ld_sum = 0;
-    }
-    (void)rdmult;
-  }
-}
-
-static void inter_mode_data_push(TileDataEnc *tile_data, BLOCK_SIZE bsize,
-                                 int64_t sse, int64_t dist, int residue_cost) {
-  if (residue_cost == 0 || sse == dist) return;
-  const int block_idx = inter_mode_data_block_idx(bsize);
-  if (block_idx == -1) return;
-  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
-  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
-    aom_clear_system_state();
-    const double ld = (sse - dist) * 1. / residue_cost;
-    ++rd_model->num;
-    rd_model->dist_sum += dist;
-    rd_model->ld_sum += ld;
-    rd_model->sse_sum += sse;
-    rd_model->sse_sse_sum += sse * sse;
-    rd_model->sse_ld_sum += sse * ld;
-  }
-}
-
-static void inter_modes_info_push(InterModesInfo *inter_modes_info,
-                                  int mode_rate, int64_t sse, int64_t est_rd,
-                                  const MB_MODE_INFO *mbmi) {
-  const int num = inter_modes_info->num;
-  assert(num < MAX_INTER_MODES);
-  inter_modes_info->mbmi_arr[num] = *mbmi;
-  inter_modes_info->mode_rate_arr[num] = mode_rate;
-  inter_modes_info->sse_arr[num] = sse;
-  inter_modes_info->est_rd_arr[num] = est_rd;
-  ++inter_modes_info->num;
-}
-
-static int compare_rd_idx_pair(const void *a, const void *b) {
-  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
-    return 0;
-  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
-    return 1;
-  } else {
-    return -1;
-  }
-}
-
-static void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
-                                  RdIdxPair *rd_idx_pair_arr) {
-  if (inter_modes_info->num == 0) {
-    return;
-  }
-  for (int i = 0; i < inter_modes_info->num; ++i) {
-    rd_idx_pair_arr[i].idx = i;
-    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
-  }
-  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
-        compare_rd_idx_pair);
-}
-#endif  // CONFIG_COLLECT_INTER_MODE_RD_STATS
-
-static INLINE int write_uniform_cost(int n, int v) {
-  const int l = get_unsigned_bits(n);
-  const int m = (1 << l) - n;
-  if (l == 0) return 0;
-  if (v < m)
-    return av1_cost_literal(l - 1);
-  else
-    return av1_cost_literal(l);
-}
-
-// Similar to store_cfl_required(), but for use during the RDO process,
-// where we haven't yet determined whether this block uses CfL.
-static INLINE CFL_ALLOWED_TYPE store_cfl_required_rdo(const AV1_COMMON *cm,
-                                                      const MACROBLOCK *x) {
-  const MACROBLOCKD *xd = &x->e_mbd;
-
-  if (cm->seq_params.monochrome || x->skip_chroma_rd) return CFL_DISALLOWED;
-
-  if (!xd->cfl.is_chroma_reference) {
-    // For non-chroma-reference blocks, we should always store the luma pixels,
-    // in case the corresponding chroma-reference block uses CfL.
-    // Note that this can only happen for block sizes which are <8 on
-    // their shortest side, as otherwise they would be chroma reference
-    // blocks.
-    return CFL_ALLOWED;
-  }
-
-  // For chroma reference blocks, we should store data in the encoder iff we're
-  // allowed to try out CfL.
-  return is_cfl_allowed(xd);
-}
-
-// constants for prune 1 and prune 2 decision boundaries
-#define FAST_EXT_TX_CORR_MID 0.0
-#define FAST_EXT_TX_EDST_MID 0.1
-#define FAST_EXT_TX_CORR_MARGIN 0.5
-#define FAST_EXT_TX_EDST_MARGIN 0.3
-
-static int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
-                           RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                           int64_t ref_best_rd, FAST_TX_SEARCH_MODE ftxs_mode);
-
-static unsigned pixel_dist_visible_only(
-    const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
-    const int src_stride, const uint8_t *dst, const int dst_stride,
-    const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows,
-    int visible_cols) {
-  unsigned sse;
-
-  if (txb_rows == visible_rows && txb_cols == visible_cols) {
-    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
-    return sse;
-  }
-  const MACROBLOCKD *xd = &x->e_mbd;
-
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
-                                             visible_cols, visible_rows);
-    return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2);
-  }
-  sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols,
-                         visible_rows);
-  return sse;
-}
-
-#if CONFIG_DIST_8X8
-static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
-                                    int sstride, int coeff_shift) {
-  uint64_t svar = 0;
-  uint64_t dvar = 0;
-  uint64_t sum_s = 0;
-  uint64_t sum_d = 0;
-  uint64_t sum_s2 = 0;
-  uint64_t sum_d2 = 0;
-  uint64_t sum_sd = 0;
-  uint64_t dist = 0;
-
-  int i, j;
-  for (i = 0; i < 8; i++) {
-    for (j = 0; j < 8; j++) {
-      sum_s += src[i * sstride + j];
-      sum_d += dst[i * dstride + j];
-      sum_s2 += src[i * sstride + j] * src[i * sstride + j];
-      sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
-      sum_sd += src[i * sstride + j] * dst[i * dstride + j];
-    }
-  }
-  /* Compute the variance -- the calculation cannot go negative. */
-  svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
-  dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
-
-  // Tuning of jm's original dering distortion metric used in CDEF tool,
-  // suggested by jm
-  const uint64_t a = 4;
-  const uint64_t b = 2;
-  const uint64_t c1 = (400 * a << 2 * coeff_shift);
-  const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift);
-
-  dist = (uint64_t)floor(.5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
-                                  (svar + dvar + c1) /
-                                  (sqrt(svar * (double)dvar + c2)));
-
-  // Calibrate dist to have similar rate for the same QP with MSE only
-  // distortion (as in master branch)
-  dist = (uint64_t)((float)dist * 0.75);
-
-  return dist;
-}
-
-static int od_compute_var_4x4(uint16_t *x, int stride) {
-  int sum;
-  int s2;
-  int i;
-  sum = 0;
-  s2 = 0;
-  for (i = 0; i < 4; i++) {
-    int j;
-    for (j = 0; j < 4; j++) {
-      int t;
-
-      t = x[i * stride + j];
-      sum += t;
-      s2 += t * t;
-    }
-  }
-
-  return (s2 - (sum * sum >> 4)) >> 4;
-}
-
-/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
-   the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
-   is applied both horizontally and vertically. For X=5, the filter is
-   a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
-#define OD_DIST_LP_MID (5)
-#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
-
-static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
-                                  uint16_t *y, od_coeff *e_lp, int stride) {
-  double sum;
-  int min_var;
-  double mean_var;
-  double var_stat;
-  double activity;
-  double calibration;
-  int i;
-  int j;
-  double vardist;
-
-  vardist = 0;
-
-#if 1
-  min_var = INT_MAX;
-  mean_var = 0;
-  for (i = 0; i < 3; i++) {
-    for (j = 0; j < 3; j++) {
-      int varx;
-      int vary;
-      varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
-      vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
-      min_var = OD_MINI(min_var, varx);
-      mean_var += 1. / (1 + varx);
-      /* The cast to (double) is to avoid an overflow before the sqrt.*/
-      vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
-    }
-  }
-  /* We use a different variance statistic depending on whether activity
-     masking is used, since the harmonic mean appeared slightly worse with
-     masking off. The calibration constant just ensures that we preserve the
-     rate compared to activity=1. */
-  if (use_activity_masking) {
-    calibration = 1.95;
-    var_stat = 9. / mean_var;
-  } else {
-    calibration = 1.62;
-    var_stat = min_var;
-  }
-  /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
-     activity masking constant. */
-  activity = calibration * pow(.25 + var_stat, -1. / 6);
-#else
-  activity = 1;
-#endif  // 1
-  sum = 0;
-  for (i = 0; i < 8; i++) {
-    for (j = 0; j < 8; j++)
-      sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
-  }
-  /* Normalize the filter to unit DC response. */
-  sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
-               OD_DIST_LP_NORM);
-  return activity * activity * (sum + vardist);
-}
-
-// Note : Inputs x and y are in a pixel domain
-static double od_compute_dist_common(int activity_masking, uint16_t *x,
-                                     uint16_t *y, int bsize_w, int bsize_h,
-                                     int qindex, od_coeff *tmp,
-                                     od_coeff *e_lp) {
-  int i, j;
-  double sum = 0;
-  const int mid = OD_DIST_LP_MID;
-
-  for (j = 0; j < bsize_w; j++) {
-    e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
-    e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
-                                        2 * tmp[(bsize_h - 2) * bsize_w + j];
-  }
-  for (i = 1; i < bsize_h - 1; i++) {
-    for (j = 0; j < bsize_w; j++) {
-      e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
-                              tmp[(i - 1) * bsize_w + j] +
-                              tmp[(i + 1) * bsize_w + j];
-    }
-  }
-  for (i = 0; i < bsize_h; i += 8) {
-    for (j = 0; j < bsize_w; j += 8) {
-      sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
-                                 &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
-                                 bsize_w);
-    }
-  }
-  /* Scale according to linear regression against SSE, for 8x8 blocks. */
-  if (activity_masking) {
-    sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
-           (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
-  } else {
-    sum *= qindex >= 128
-               ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
-               : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
-                              : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
-  }
-
-  return sum;
-}
-
-static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
-                              int bsize_h, int qindex) {
-  assert(bsize_w >= 8 && bsize_h >= 8);
-
-  int activity_masking = 0;
-
-  int i, j;
-  DECLARE_ALIGNED(16, od_coeff, e[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]);
-  for (i = 0; i < bsize_h; i++) {
-    for (j = 0; j < bsize_w; j++) {
-      e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
-    }
-  }
-  int mid = OD_DIST_LP_MID;
-  for (i = 0; i < bsize_h; i++) {
-    tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
-    tmp[i * bsize_w + bsize_w - 1] =
-        mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
-    for (j = 1; j < bsize_w - 1; j++) {
-      tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
-                             e[i * bsize_w + j + 1];
-    }
-  }
-  return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
-                                qindex, tmp, e_lp);
-}
-
-static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
-                                   int bsize_h, int qindex) {
-  assert(bsize_w >= 8 && bsize_h >= 8);
-
-  int activity_masking = 0;
-
-  DECLARE_ALIGNED(16, uint16_t, y[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]);
-  int i, j;
-  for (i = 0; i < bsize_h; i++) {
-    for (j = 0; j < bsize_w; j++) {
-      y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
-    }
-  }
-  int mid = OD_DIST_LP_MID;
-  for (i = 0; i < bsize_h; i++) {
-    tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
-    tmp[i * bsize_w + bsize_w - 1] =
-        mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
-    for (j = 1; j < bsize_w - 1; j++) {
-      tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
-                             e[i * bsize_w + j + 1];
-    }
-  }
-  return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
-                                qindex, tmp, e_lp);
-}
-
-int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
-                     const uint8_t *src, int src_stride, const uint8_t *dst,
-                     int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
-                     int bsh, int visible_w, int visible_h, int qindex) {
-  int64_t d = 0;
-  int i, j;
-  const MACROBLOCKD *xd = &x->e_mbd;
-
-  DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(16, uint16_t, rec[MAX_SB_SQUARE]);
-
-  assert(bsw >= 8);
-  assert(bsh >= 8);
-  assert((bsw & 0x07) == 0);
-  assert((bsh & 0x07) == 0);
-
-  if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
-      x->tune_metric == AOM_TUNE_DAALA_DIST) {
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      for (j = 0; j < bsh; j++)
-        for (i = 0; i < bsw; i++)
-          orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-
-      if ((bsw == visible_w) && (bsh == visible_h)) {
-        for (j = 0; j < bsh; j++)
-          for (i = 0; i < bsw; i++)
-            rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
-      } else {
-        for (j = 0; j < visible_h; j++)
-          for (i = 0; i < visible_w; i++)
-            rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
-
-        if (visible_w < bsw) {
-          for (j = 0; j < bsh; j++)
-            for (i = visible_w; i < bsw; i++)
-              rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-        }
-
-        if (visible_h < bsh) {
-          for (j = visible_h; j < bsh; j++)
-            for (i = 0; i < bsw; i++)
-              rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-        }
-      }
-    } else {
-      for (j = 0; j < bsh; j++)
-        for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
-
-      if ((bsw == visible_w) && (bsh == visible_h)) {
-        for (j = 0; j < bsh; j++)
-          for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
-      } else {
-        for (j = 0; j < visible_h; j++)
-          for (i = 0; i < visible_w; i++)
-            rec[j * bsw + i] = dst[j * dst_stride + i];
-
-        if (visible_w < bsw) {
-          for (j = 0; j < bsh; j++)
-            for (i = visible_w; i < bsw; i++)
-              rec[j * bsw + i] = src[j * src_stride + i];
-        }
-
-        if (visible_h < bsh) {
-          for (j = visible_h; j < bsh; j++)
-            for (i = 0; i < bsw; i++)
-              rec[j * bsw + i] = src[j * src_stride + i];
-        }
-      }
-    }
-  }
-
-  if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
-    d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
-  } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
-    int coeff_shift = AOMMAX(xd->bd - 8, 0);
-
-    for (i = 0; i < bsh; i += 8) {
-      for (j = 0; j < bsw; j += 8) {
-        d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j],
-                                 bsw, coeff_shift);
-      }
-    }
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-      d = ((uint64_t)d) >> 2 * coeff_shift;
-  } else {
-    // Otherwise, MSE by default
-    d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride,
-                                tx_bsize, bsh, bsw, visible_h, visible_w);
-  }
-
-  return d;
-}
-
-static int64_t dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
-                             int src_stride, const int16_t *diff,
-                             int diff_stride, int bsw, int bsh, int visible_w,
-                             int visible_h, int qindex) {
-  int64_t d = 0;
-  int i, j;
-  const MACROBLOCKD *xd = &x->e_mbd;
-
-  DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(16, int16_t, diff16[MAX_SB_SQUARE]);
-
-  assert(bsw >= 8);
-  assert(bsh >= 8);
-  assert((bsw & 0x07) == 0);
-  assert((bsh & 0x07) == 0);
-
-  if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
-      x->tune_metric == AOM_TUNE_DAALA_DIST) {
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      for (j = 0; j < bsh; j++)
-        for (i = 0; i < bsw; i++)
-          orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-    } else {
-      for (j = 0; j < bsh; j++)
-        for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
-    }
-
-    if ((bsw == visible_w) && (bsh == visible_h)) {
-      for (j = 0; j < bsh; j++)
-        for (i = 0; i < bsw; i++)
-          diff16[j * bsw + i] = diff[j * diff_stride + i];
-    } else {
-      for (j = 0; j < visible_h; j++)
-        for (i = 0; i < visible_w; i++)
-          diff16[j * bsw + i] = diff[j * diff_stride + i];
-
-      if (visible_w < bsw) {
-        for (j = 0; j < bsh; j++)
-          for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
-      }
-
-      if (visible_h < bsh) {
-        for (j = visible_h; j < bsh; j++)
-          for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
-      }
-    }
-  }
-
-  if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
-    d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
-  } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
-    int coeff_shift = AOMMAX(xd->bd - 8, 0);
-    DECLARE_ALIGNED(16, uint16_t, dst16[MAX_SB_SQUARE]);
-
-    for (i = 0; i < bsh; i++) {
-      for (j = 0; j < bsw; j++) {
-        dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j];
-      }
-    }
-
-    for (i = 0; i < bsh; i += 8) {
-      for (j = 0; j < bsw; j += 8) {
-        d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j],
-                                 bsw, coeff_shift);
-      }
-    }
-    // Don't scale 'd' for HBD since it will be done by caller side for diff
-    // input
-  } else {
-    // Otherwise, MSE by default
-    d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h);
-  }
-
-  return d;
-}
-#endif  // CONFIG_DIST_8X8
-
-static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
-                                         const uint8_t *src, int src_stride,
-                                         const uint8_t *dst, int dst_stride,
-                                         int need_4th, double *hordist,
-                                         double *verdist) {
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
-  if (bsize < BLOCK_16X16 || (bsize >= BLOCK_4X16 && bsize <= BLOCK_32X8)) {
-    // Special cases: calculate 'esq' values manually, as we don't have 'vf'
-    // functions for the 16 (very small) sub-blocks of this block.
-    const int w_shift = (bw == 4) ? 0 : (bw == 8) ? 1 : (bw == 16) ? 2 : 3;
-    const int h_shift = (bh == 4) ? 0 : (bh == 8) ? 1 : (bh == 16) ? 2 : 3;
-    assert(bw <= 32);
-    assert(bh <= 32);
-    assert(((bw - 1) >> w_shift) + (((bh - 1) >> h_shift) << 2) == 15);
-    if (cpi->common.seq_params.use_highbitdepth) {
-      const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
-      const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
-      for (int i = 0; i < bh; ++i)
-        for (int j = 0; j < bw; ++j) {
-          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
-          esq[index] +=
-              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
-              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
-        }
-    } else {
-      for (int i = 0; i < bh; ++i)
-        for (int j = 0; j < bw; ++j) {
-          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
-          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
-                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
-        }
-    }
-  } else {  // Calculate 'esq' values using 'vf' functions on the 16 sub-blocks.
-    const int f_index =
-        (bsize < BLOCK_SIZES) ? bsize - BLOCK_16X16 : bsize - BLOCK_8X16;
-    assert(f_index >= 0 && f_index < BLOCK_SIZES_ALL);
-    const BLOCK_SIZE subsize = (BLOCK_SIZE)f_index;
-    assert(block_size_wide[bsize] == 4 * block_size_wide[subsize]);
-    assert(block_size_high[bsize] == 4 * block_size_high[subsize]);
-    cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[0]);
-    cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
-                            &esq[1]);
-    cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
-                            &esq[2]);
-    cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
-                            dst_stride, &esq[3]);
-    src += bh / 4 * src_stride;
-    dst += bh / 4 * dst_stride;
-
-    cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[4]);
-    cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
-                            &esq[5]);
-    cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
-                            &esq[6]);
-    cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
-                            dst_stride, &esq[7]);
-    src += bh / 4 * src_stride;
-    dst += bh / 4 * dst_stride;
-
-    cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[8]);
-    cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
-                            &esq[9]);
-    cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
-                            &esq[10]);
-    cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
-                            dst_stride, &esq[11]);
-    src += bh / 4 * src_stride;
-    dst += bh / 4 * dst_stride;
-
-    cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[12]);
-    cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
-                            &esq[13]);
-    cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
-                            &esq[14]);
-    cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
-                            dst_stride, &esq[15]);
-  }
-
-  double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
-                 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
-                 esq[12] + esq[13] + esq[14] + esq[15];
-  if (total > 0) {
-    const double e_recip = 1.0 / total;
-    hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
-    hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
-    hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
-    if (need_4th) {
-      hordist[3] = ((double)esq[3] + esq[7] + esq[11] + esq[15]) * e_recip;
-    }
-    verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
-    verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
-    verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
-    if (need_4th) {
-      verdist[3] = ((double)esq[12] + esq[13] + esq[14] + esq[15]) * e_recip;
-    }
-  } else {
-    hordist[0] = verdist[0] = 0.25;
-    hordist[1] = verdist[1] = 0.25;
-    hordist[2] = verdist[2] = 0.25;
-    if (need_4th) {
-      hordist[3] = verdist[3] = 0.25;
-    }
-  }
-}
-
-static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
-                            const uint8_t *src, int src_stride,
-                            const uint8_t *dst, int dst_stride) {
-  int prune_bitmask = 0;
-  double svm_proj_h = 0, svm_proj_v = 0;
-  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
-  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride, 0,
-                               hdist, vdist);
-
-  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
-               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
-  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
-               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
-  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
-    prune_bitmask |= 1 << FLIPADST_1D;
-  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
-    prune_bitmask |= 1 << ADST_1D;
-
-  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
-    prune_bitmask |= 1 << (FLIPADST_1D + 8);
-  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
-    prune_bitmask |= 1 << (ADST_1D + 8);
-
-  return prune_bitmask;
-}
-
-static void get_horver_correlation(const int16_t *diff, int stride, int w,
-                                   int h, double *hcorr, double *vcorr) {
-  // Returns hor/ver correlation coefficient
-  const int num = (h - 1) * (w - 1);
-  double num_r;
-  int i, j;
-  int64_t xy_sum = 0, xz_sum = 0;
-  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
-  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
-  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
-  *hcorr = *vcorr = 1;
-
-  assert(num > 0);
-  num_r = 1.0 / num;
-  for (i = 1; i < h; ++i) {
-    for (j = 1; j < w; ++j) {
-      const int16_t x = diff[i * stride + j];
-      const int16_t y = diff[i * stride + j - 1];
-      const int16_t z = diff[(i - 1) * stride + j];
-      xy_sum += x * y;
-      xz_sum += x * z;
-      x_sum += x;
-      y_sum += y;
-      z_sum += z;
-      x2_sum += x * x;
-      y2_sum += y * y;
-      z2_sum += z * z;
-    }
-  }
-  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
-  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
-  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
-  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
-  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
-  if (x_var_n > 0 && y_var_n > 0) {
-    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
-    *hcorr = *hcorr < 0 ? 0 : *hcorr;
-  }
-  if (x_var_n > 0 && z_var_n > 0) {
-    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
-    *vcorr = *vcorr < 0 ? 0 : *vcorr;
-  }
-}
-
-static int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
-  double hcorr, vcorr;
-  int prune_bitmask = 0;
-  get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
-
-  if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
-    prune_bitmask |= 1 << IDTX_1D;
-  else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
-    prune_bitmask |= 1 << DCT_1D;
-
-  if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
-    prune_bitmask |= 1 << (IDTX_1D + 8);
-  else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
-    prune_bitmask |= 1 << (DCT_1D + 8);
-  return prune_bitmask;
-}
-
-// Performance drop: 0.5%, Speed improvement: 24%
-static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
-                             MACROBLOCK *x, const MACROBLOCKD *xd,
-                             int adst_flipadst, int dct_idtx) {
-  int prune = 0;
-
-  if (adst_flipadst) {
-    const struct macroblock_plane *const p = &x->plane[0];
-    const struct macroblockd_plane *const pd = &xd->plane[0];
-    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
-                              pd->dst.buf, pd->dst.stride);
-  }
-  if (dct_idtx) {
-    av1_subtract_plane(x, bsize, 0);
-    const struct macroblock_plane *const p = &x->plane[0];
-    const int bw = block_size_wide[bsize];
-    const int bh = block_size_high[bsize];
-    prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
-  }
-
-  return prune;
-}
-
-// Performance drop: 0.3%, Speed improvement: 5%
-static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
-                             const MACROBLOCK *x, const MACROBLOCKD *xd) {
-  const struct macroblock_plane *const p = &x->plane[0];
-  const struct macroblockd_plane *const pd = &xd->plane[0];
-  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
-                          pd->dst.stride);
-}
-
-// 1D Transforms used in inter set, this needs to be changed if
-// ext_tx_used_inter is changed
-static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
-  { 1, 0, 0, 0 },
-  { 1, 1, 1, 1 },
-  { 1, 1, 1, 1 },
-  { 1, 0, 0, 1 },
-};
-
-static void get_energy_distribution_finer(const int16_t *diff, int stride,
-                                          int bw, int bh, float *hordist,
-                                          float *verdist) {
-  // First compute downscaled block energy values (esq); downscale factors
-  // are defined by w_shift and h_shift.
-  unsigned int esq[256];
-  const int w_shift = bw <= 8 ? 0 : 1;
-  const int h_shift = bh <= 8 ? 0 : 1;
-  const int esq_w = bw >> w_shift;
-  const int esq_h = bh >> h_shift;
-  const int esq_sz = esq_w * esq_h;
-  int i, j;
-  memset(esq, 0, esq_sz * sizeof(esq[0]));
-  if (w_shift) {
-    for (i = 0; i < bh; i++) {
-      unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w;
-      const int16_t *cur_diff_row = diff + i * stride;
-      for (j = 0; j < bw; j += 2) {
-        cur_esq_row[j >> 1] += (cur_diff_row[j] * cur_diff_row[j] +
-                                cur_diff_row[j + 1] * cur_diff_row[j + 1]);
-      }
-    }
-  } else {
-    for (i = 0; i < bh; i++) {
-      unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w;
-      const int16_t *cur_diff_row = diff + i * stride;
-      for (j = 0; j < bw; j++) {
-        cur_esq_row[j] += cur_diff_row[j] * cur_diff_row[j];
-      }
-    }
-  }
-
-  uint64_t total = 0;
-  for (i = 0; i < esq_sz; i++) total += esq[i];
-
-  // Output hordist and verdist arrays are normalized 1D projections of esq
-  if (total == 0) {
-    float hor_val = 1.0f / esq_w;
-    for (j = 0; j < esq_w - 1; j++) hordist[j] = hor_val;
-    float ver_val = 1.0f / esq_h;
-    for (i = 0; i < esq_h - 1; i++) verdist[i] = ver_val;
-    return;
-  }
-
-  const float e_recip = 1.0f / (float)total;
-  memset(hordist, 0, (esq_w - 1) * sizeof(hordist[0]));
-  memset(verdist, 0, (esq_h - 1) * sizeof(verdist[0]));
-  const unsigned int *cur_esq_row;
-  for (i = 0; i < esq_h - 1; i++) {
-    cur_esq_row = esq + i * esq_w;
-    for (j = 0; j < esq_w - 1; j++) {
-      hordist[j] += (float)cur_esq_row[j];
-      verdist[i] += (float)cur_esq_row[j];
-    }
-    verdist[i] += (float)cur_esq_row[j];
-  }
-  cur_esq_row = esq + i * esq_w;
-  for (j = 0; j < esq_w - 1; j++) hordist[j] += (float)cur_esq_row[j];
-
-  for (j = 0; j < esq_w - 1; j++) hordist[j] *= e_recip;
-  for (i = 0; i < esq_h - 1; i++) verdist[i] *= e_recip;
-}
-
-// Similar to get_horver_correlation, but also takes into account first
-// row/column, when computing horizontal/vertical correlation.
-static void get_horver_correlation_full(const int16_t *diff, int stride, int w,
-                                        int h, float *hcorr, float *vcorr) {
-  const float num_hor = (float)(h * (w - 1));
-  const float num_ver = (float)((h - 1) * w);
-  int i, j;
-
-  // The following notation is used:
-  // x - current pixel
-  // y - left neighbor pixel
-  // z - top neighbor pixel
-  int64_t xy_sum = 0, xz_sum = 0;
-  int64_t xhor_sum = 0, xver_sum = 0, y_sum = 0, z_sum = 0;
-  int64_t x2hor_sum = 0, x2ver_sum = 0, y2_sum = 0, z2_sum = 0;
-
-  int16_t x, y, z;
-  for (j = 1; j < w; ++j) {
-    x = diff[j];
-    y = diff[j - 1];
-    xy_sum += x * y;
-    xhor_sum += x;
-    y_sum += y;
-    x2hor_sum += x * x;
-    y2_sum += y * y;
-  }
-  for (i = 1; i < h; ++i) {
-    x = diff[i * stride];
-    z = diff[(i - 1) * stride];
-    xz_sum += x * z;
-    xver_sum += x;
-    z_sum += z;
-    x2ver_sum += x * x;
-    z2_sum += z * z;
-    for (j = 1; j < w; ++j) {
-      x = diff[i * stride + j];
-      y = diff[i * stride + j - 1];
-      z = diff[(i - 1) * stride + j];
-      xy_sum += x * y;
-      xz_sum += x * z;
-      xhor_sum += x;
-      xver_sum += x;
-      y_sum += y;
-      z_sum += z;
-      x2hor_sum += x * x;
-      x2ver_sum += x * x;
-      y2_sum += y * y;
-      z2_sum += z * z;
-    }
-  }
-  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
-  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
-  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
-  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
-  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
-  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
-
-  *hcorr = *vcorr = 1;
-  if (xhor_var_n > 0 && y_var_n > 0) {
-    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
-    *hcorr = *hcorr < 0 ? 0 : *hcorr;
-  }
-  if (xver_var_n > 0 && z_var_n > 0) {
-    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
-    *vcorr = *vcorr < 0 ? 0 : *vcorr;
-  }
-}
-
-// Transforms raw scores into a probability distribution across 16 TX types
-static void score_2D_transform_pow8(float *scores_2D, float shift) {
-  float sum = 0.0f;
-  int i;
-
-  for (i = 0; i < 16; i++) {
-    float v, v2, v4;
-    v = AOMMAX(scores_2D[i] + shift, 0.0f);
-    v2 = v * v;
-    v4 = v2 * v2;
-    scores_2D[i] = v4 * v4;
-    sum += scores_2D[i];
-  }
-  for (i = 0; i < 16; i++) scores_2D[i] /= sum;
-}
-
-// These thresholds were calibrated to provide a certain number of TX types
-// pruned by the model on average, i.e. selecting a threshold with index i
-// will lead to pruning i+1 TX types on average
-static const float *prune_2D_adaptive_thresholds[] = {
-  // TX_4X4
-  (float[]){ 0.00549f, 0.01306f, 0.02039f, 0.02747f, 0.03406f, 0.04065f,
-             0.04724f, 0.05383f, 0.06067f, 0.06799f, 0.07605f, 0.08533f,
-             0.09778f, 0.11780f },
-  // TX_8X8
-  (float[]){ 0.00037f, 0.00183f, 0.00525f, 0.01038f, 0.01697f, 0.02502f,
-             0.03381f, 0.04333f, 0.05286f, 0.06287f, 0.07434f, 0.08850f,
-             0.10803f, 0.14124f },
-  // TX_16X16
-  (float[]){ 0.01404f, 0.02820f, 0.04211f, 0.05164f, 0.05798f, 0.06335f,
-             0.06897f, 0.07629f, 0.08875f, 0.11169f },
-  // TX_32X32
-  NULL,
-  // TX_64X64
-  NULL,
-  // TX_4X8
-  (float[]){ 0.00183f, 0.00745f, 0.01428f, 0.02185f, 0.02966f, 0.03723f,
-             0.04456f, 0.05188f, 0.05920f, 0.06702f, 0.07605f, 0.08704f,
-             0.10168f, 0.12585f },
-  // TX_8X4
-  (float[]){ 0.00085f, 0.00476f, 0.01135f, 0.01892f, 0.02698f, 0.03528f,
-             0.04358f, 0.05164f, 0.05994f, 0.06848f, 0.07849f, 0.09021f,
-             0.10583f, 0.13123f },
-  // TX_8X16
-  (float[]){ 0.00037f, 0.00232f, 0.00671f, 0.01257f, 0.01965f, 0.02722f,
-             0.03552f, 0.04382f, 0.05237f, 0.06189f, 0.07336f, 0.08728f,
-             0.10730f, 0.14221f },
-  // TX_16X8
-  (float[]){ 0.00061f, 0.00330f, 0.00818f, 0.01453f, 0.02185f, 0.02966f,
-             0.03772f, 0.04578f, 0.05383f, 0.06262f, 0.07288f, 0.08582f,
-             0.10339f, 0.13464f },
-  // TX_16X32
-  NULL,
-  // TX_32X16
-  NULL,
-  // TX_32X64
-  NULL,
-  // TX_64X32
-  NULL,
-  // TX_4X16
-  (float[]){ 0.00232f, 0.00671f, 0.01257f, 0.01941f, 0.02673f, 0.03430f,
-             0.04211f, 0.04968f, 0.05750f, 0.06580f, 0.07507f, 0.08655f,
-             0.10242f, 0.12878f },
-  // TX_16X4
-  (float[]){ 0.00110f, 0.00525f, 0.01208f, 0.01990f, 0.02795f, 0.03601f,
-             0.04358f, 0.05115f, 0.05896f, 0.06702f, 0.07629f, 0.08752f,
-             0.10217f, 0.12610f },
-  // TX_8X32
-  NULL,
-  // TX_32X8
-  NULL,
-  // TX_16X64
-  NULL,
-  // TX_64X16
-  NULL,
-};
-
-static uint16_t prune_tx_2D(MACROBLOCK *x, BLOCK_SIZE bsize, TX_SIZE tx_size,
-                            int blk_row, int blk_col, TxSetType tx_set_type,
-                            TX_TYPE_PRUNE_MODE prune_mode) {
-  static const int tx_type_table_2D[16] = {
-    DCT_DCT,      DCT_ADST,      DCT_FLIPADST,      V_DCT,
-    ADST_DCT,     ADST_ADST,     ADST_FLIPADST,     V_ADST,
-    FLIPADST_DCT, FLIPADST_ADST, FLIPADST_FLIPADST, V_FLIPADST,
-    H_DCT,        H_ADST,        H_FLIPADST,        IDTX
-  };
-  if (tx_set_type != EXT_TX_SET_ALL16 &&
-      tx_set_type != EXT_TX_SET_DTT9_IDTX_1DDCT)
-    return 0;
-  const NN_CONFIG *nn_config_hor = av1_tx_type_nnconfig_map_hor[tx_size];
-  const NN_CONFIG *nn_config_ver = av1_tx_type_nnconfig_map_ver[tx_size];
-  if (!nn_config_hor || !nn_config_ver) return 0;  // Model not established yet.
-
-  aom_clear_system_state();
-  float hfeatures[16], vfeatures[16];
-  float hscores[4], vscores[4];
-  float scores_2D[16];
-  const int bw = tx_size_wide[tx_size];
-  const int bh = tx_size_high[tx_size];
-  const int hfeatures_num = bw <= 8 ? bw : bw / 2;
-  const int vfeatures_num = bh <= 8 ? bh : bh / 2;
-  assert(hfeatures_num <= 16);
-  assert(vfeatures_num <= 16);
-
-  const struct macroblock_plane *const p = &x->plane[0];
-  const int diff_stride = block_size_wide[bsize];
-  const int16_t *diff = p->src_diff + 4 * blk_row * diff_stride + 4 * blk_col;
-  get_energy_distribution_finer(diff, diff_stride, bw, bh, hfeatures,
-                                vfeatures);
-  get_horver_correlation_full(diff, diff_stride, bw, bh,
-                              &hfeatures[hfeatures_num - 1],
-                              &vfeatures[vfeatures_num - 1]);
-  av1_nn_predict(hfeatures, nn_config_hor, hscores);
-  av1_nn_predict(vfeatures, nn_config_ver, vscores);
-
-  float score_2D_average = 0.0f;
-  for (int i = 0; i < 4; i++) {
-    float *cur_scores_2D = scores_2D + i * 4;
-    cur_scores_2D[0] = vscores[i] * hscores[0];
-    cur_scores_2D[1] = vscores[i] * hscores[1];
-    cur_scores_2D[2] = vscores[i] * hscores[2];
-    cur_scores_2D[3] = vscores[i] * hscores[3];
-    score_2D_average += cur_scores_2D[0] + cur_scores_2D[1] + cur_scores_2D[2] +
-                        cur_scores_2D[3];
-  }
-  score_2D_average /= 16;
-
-  const int prune_aggr_table[2][2] = { { 6, 4 }, { 10, 7 } };
-  int pruning_aggressiveness = 1;
-  if (tx_set_type == EXT_TX_SET_ALL16) {
-    score_2D_transform_pow8(scores_2D, (10 - score_2D_average));
-    pruning_aggressiveness =
-        prune_aggr_table[prune_mode - PRUNE_2D_ACCURATE][0];
-  } else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT) {
-    score_2D_transform_pow8(scores_2D, (20 - score_2D_average));
-    pruning_aggressiveness =
-        prune_aggr_table[prune_mode - PRUNE_2D_ACCURATE][1];
-  }
-
-  // Always keep the TX type with the highest score, prune all others with
-  // score below score_thresh.
-  int max_score_i = 0;
-  float max_score = 0.0f;
-  for (int i = 0; i < 16; i++) {
-    if (scores_2D[i] > max_score &&
-        av1_ext_tx_used[tx_set_type][tx_type_table_2D[i]]) {
-      max_score = scores_2D[i];
-      max_score_i = i;
-    }
-  }
-
-  const float score_thresh =
-      prune_2D_adaptive_thresholds[tx_size][pruning_aggressiveness - 1];
-
-  uint16_t prune_bitmask = 0;
-  for (int i = 0; i < 16; i++) {
-    if (scores_2D[i] < score_thresh && i != max_score_i)
-      prune_bitmask |= (1 << tx_type_table_2D[i]);
-  }
-  return prune_bitmask;
-}
-
-// ((prune >> vtx_tab[tx_type]) & 1)
-static const uint16_t prune_v_mask[] = {
-  0x0000, 0x0425, 0x108a, 0x14af, 0x4150, 0x4575, 0x51da, 0x55ff,
-  0xaa00, 0xae25, 0xba8a, 0xbeaf, 0xeb50, 0xef75, 0xfbda, 0xffff,
-};
-
-// ((prune >> (htx_tab[tx_type] + 8)) & 1)
-static const uint16_t prune_h_mask[] = {
-  0x0000, 0x0813, 0x210c, 0x291f, 0x80e0, 0x88f3, 0xa1ec, 0xa9ff,
-  0x5600, 0x5e13, 0x770c, 0x7f1f, 0xd6e0, 0xdef3, 0xf7ec, 0xffff,
-};
-
-static INLINE uint16_t gen_tx_search_prune_mask(int tx_search_prune) {
-  uint8_t prune_v = tx_search_prune & 0x0F;
-  uint8_t prune_h = (tx_search_prune >> 8) & 0x0F;
-  return (prune_v_mask[prune_v] & prune_h_mask[prune_h]);
-}
-
-static void prune_tx(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
-                     const MACROBLOCKD *const xd, int tx_set_type) {
-  x->tx_search_prune[tx_set_type] = 0;
-  x->tx_split_prune_flag = 0;
-  const MB_MODE_INFO *mbmi = xd->mi[0];
-  if (!is_inter_block(mbmi) || cpi->sf.tx_type_search.prune_mode == NO_PRUNE ||
-      x->use_default_inter_tx_type || xd->lossless[mbmi->segment_id] ||
-      x->cb_partition_scan)
-    return;
-  int tx_set = ext_tx_set_index[1][tx_set_type];
-  assert(tx_set >= 0);
-  const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
-  int prune = 0;
-  switch (cpi->sf.tx_type_search.prune_mode) {
-    case NO_PRUNE: return;
-    case PRUNE_ONE:
-      if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) return;
-      prune = prune_one_for_sby(cpi, bsize, x, xd);
-      x->tx_search_prune[tx_set_type] = gen_tx_search_prune_mask(prune);
-      break;
-    case PRUNE_TWO:
-      if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
-        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return;
-        prune = prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
-      } else if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) {
-        prune = prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
-      } else {
-        prune = prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
-      }
-      x->tx_search_prune[tx_set_type] = gen_tx_search_prune_mask(prune);
-      break;
-    case PRUNE_2D_ACCURATE:
-    case PRUNE_2D_FAST: break;
-    default: assert(0);
-  }
-}
-
-static void model_rd_from_sse(const AV1_COMP *const cpi,
-                              const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
-                              int plane, int64_t sse, int num_samples,
-                              int *rate, int64_t *dist) {
-  (void)num_samples;
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int dequant_shift =
-      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
-
-  // Fast approximate the modelling function.
-  if (cpi->sf.simple_model_rd_from_var) {
-    const int64_t square_error = sse;
-    int quantizer = pd->dequant_Q3[1] >> dequant_shift;
-    if (quantizer < 120)
-      *rate = (int)AOMMIN(
-          (square_error * (280 - quantizer)) >> (16 - AV1_PROB_COST_SHIFT),
-          INT_MAX);
-    else
-      *rate = 0;
-    assert(*rate >= 0);
-    *dist = (square_error * quantizer) >> 8;
-  } else {
-    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[plane_bsize],
-                                 pd->dequant_Q3[1] >> dequant_shift, rate,
-                                 dist);
-  }
-  *dist <<= 4;
-}
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x) {
-  const AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  const MACROBLOCKD *xd = &x->e_mbd;
-  const MB_MODE_INFO *mbmi = xd->mi[0];
-  int64_t total_sse = 0;
-  for (int plane = 0; plane < num_planes; ++plane) {
-    const struct macroblock_plane *const p = &x->plane[plane];
-    const struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE bs = get_plane_block_size(mbmi->sb_type, pd->subsampling_x,
-                                               pd->subsampling_y);
-    unsigned int sse;
-
-    if (x->skip_chroma_rd && plane) continue;
-
-    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
-                       &sse);
-    total_sse += sse;
-  }
-  total_sse <<= 4;
-  return total_sse;
-}
-#endif
-
-static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
-                            int plane_to, int mi_row, int mi_col,
-                            int *out_rate_sum, int64_t *out_dist_sum,
-                            int *skip_txfm_sb, int64_t *skip_sse_sb,
-                            int *plane_rate, int64_t *plane_sse,
-                            int64_t *plane_dist) {
-  // Note our transform coeffs are 8 times an orthogonal transform.
-  // Hence quantizer step is also 8 times. To get effective quantizer
-  // we need to divide by 8 before sending to modeling function.
-  int plane;
-  (void)mi_row;
-  (void)mi_col;
-  const int ref = xd->mi[0]->ref_frame[0];
-
-  int64_t rate_sum = 0;
-  int64_t dist_sum = 0;
-  int64_t total_sse = 0;
-
-  for (plane = plane_from; plane <= plane_to; ++plane) {
-    struct macroblock_plane *const p = &x->plane[plane];
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    const int bw = block_size_wide[plane_bsize];
-    const int bh = block_size_high[plane_bsize];
-    int64_t sse;
-    int rate;
-    int64_t dist;
-
-    if (x->skip_chroma_rd && plane) continue;
-
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
-                           pd->dst.stride, bw, bh);
-    } else {
-      sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
-                    bh);
-    }
-    sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
-
-    model_rd_from_sse(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist);
-
-    if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
-    total_sse += sse;
-    rate_sum += rate;
-    dist_sum += dist;
-    if (plane_rate) plane_rate[plane] = rate;
-    if (plane_sse) plane_sse[plane] = sse;
-    if (plane_dist) plane_dist[plane] = dist;
-    assert(rate_sum >= 0);
-  }
-
-  if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
-  if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
-  rate_sum = AOMMIN(rate_sum, INT_MAX);
-  *out_rate_sum = (int)rate_sum;
-  *out_dist_sum = dist_sum;
-}
-
-static void check_block_skip(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-                             MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
-                             int plane_to, int *skip_txfm_sb) {
-  *skip_txfm_sb = 1;
-  for (int plane = plane_from; plane <= plane_to; ++plane) {
-    struct macroblock_plane *const p = &x->plane[plane];
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE bs =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    unsigned int sse;
-
-    if (x->skip_chroma_rd && plane) continue;
-
-    // Since fast HBD variance functions scale down sse by 4 bit, we first use
-    // fast vf implementation to rule out blocks with non-zero scaled sse. Then,
-    // only if the source is HBD and the scaled sse is 0, accurate sse
-    // computation is applied to determine if the sse is really 0. This step is
-    // necessary for HBD lossless coding.
-    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
-                       &sse);
-    if (sse) {
-      *skip_txfm_sb = 0;
-      return;
-    } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      uint64_t sse64 = aom_highbd_sse_odd_size(
-          p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
-          block_size_wide[bs], block_size_high[bs]);
-
-      if (sse64) {
-        *skip_txfm_sb = 0;
-        return;
-      }
-    }
-  }
-  return;
-}
-
-int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
-                          intptr_t block_size, int64_t *ssz) {
-  int i;
-  int64_t error = 0, sqcoeff = 0;
-
-  for (i = 0; i < block_size; i++) {
-    const int diff = coeff[i] - dqcoeff[i];
-    error += diff * diff;
-    sqcoeff += coeff[i] * coeff[i];
-  }
-
-  *ssz = sqcoeff;
-  return error;
-}
-
-int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
-                                 const tran_low_t *dqcoeff, intptr_t block_size,
-                                 int64_t *ssz, int bd) {
-  int i;
-  int64_t error = 0, sqcoeff = 0;
-  int shift = 2 * (bd - 8);
-  int rounding = shift > 0 ? 1 << (shift - 1) : 0;
-
-  for (i = 0; i < block_size; i++) {
-    const int64_t diff = coeff[i] - dqcoeff[i];
-    error += diff * diff;
-    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
-  }
-  assert(error >= 0 && sqcoeff >= 0);
-  error = (error + rounding) >> shift;
-  sqcoeff = (sqcoeff + rounding) >> shift;
-
-  *ssz = sqcoeff;
-  return error;
-}
-
-// Get transform block visible dimensions cropped to the MI units.
-static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
-                               BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
-                               BLOCK_SIZE tx_bsize, int *width, int *height,
-                               int *visible_width, int *visible_height) {
-  assert(tx_bsize <= plane_bsize);
-  int txb_height = block_size_high[tx_bsize];
-  int txb_width = block_size_wide[tx_bsize];
-  const int block_height = block_size_high[plane_bsize];
-  const int block_width = block_size_wide[plane_bsize];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  // TODO(aconverse@google.com): Investigate using crop_width/height here rather
-  // than the MI size
-  const int block_rows =
-      (xd->mb_to_bottom_edge >= 0)
-          ? block_height
-          : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
-  const int block_cols =
-      (xd->mb_to_right_edge >= 0)
-          ? block_width
-          : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
-  const int tx_unit_size = tx_size_wide_log2[0];
-  if (width) *width = txb_width;
-  if (height) *height = txb_height;
-  *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
-  *visible_height =
-      clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
-}
-
-// Compute the pixel domain distortion from src and dst on all visible 4x4s in
-// the
-// transform block.
-static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
-                           int plane, const uint8_t *src, const int src_stride,
-                           const uint8_t *dst, const int dst_stride,
-                           int blk_row, int blk_col,
-                           const BLOCK_SIZE plane_bsize,
-                           const BLOCK_SIZE tx_bsize) {
-  int txb_rows, txb_cols, visible_rows, visible_cols;
-  const MACROBLOCKD *xd = &x->e_mbd;
-
-  get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
-                     &txb_cols, &txb_rows, &visible_cols, &visible_rows);
-  assert(visible_rows > 0);
-  assert(visible_cols > 0);
-
-#if CONFIG_DIST_8X8
-  if (x->using_dist_8x8 && plane == 0)
-    return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
-                                  tx_bsize, txb_cols, txb_rows, visible_cols,
-                                  visible_rows, x->qindex);
-#endif  // CONFIG_DIST_8X8
-
-  unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst,
-                                         dst_stride, tx_bsize, txb_rows,
-                                         txb_cols, visible_rows, visible_cols);
-
-  return sse;
-}
-
-// Compute the pixel domain distortion from diff on all visible 4x4s in the
-// transform block.
-static INLINE int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
-                                      int blk_row, int blk_col,
-                                      const BLOCK_SIZE plane_bsize,
-                                      const BLOCK_SIZE tx_bsize) {
-  int visible_rows, visible_cols;
-  const MACROBLOCKD *xd = &x->e_mbd;
-  get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
-                     NULL, &visible_cols, &visible_rows);
-  const int diff_stride = block_size_wide[plane_bsize];
-  const int16_t *diff = x->plane[plane].src_diff;
-#if CONFIG_DIST_8X8
-  int txb_height = block_size_high[tx_bsize];
-  int txb_width = block_size_wide[tx_bsize];
-  if (x->using_dist_8x8 && plane == 0) {
-    const int src_stride = x->plane[plane].src.stride;
-    const int src_idx = (blk_row * src_stride + blk_col)
-                        << tx_size_wide_log2[0];
-    const int diff_idx = (blk_row * diff_stride + blk_col)
-                         << tx_size_wide_log2[0];
-    const uint8_t *src = &x->plane[plane].src.buf[src_idx];
-    return dist_8x8_diff(x, src, src_stride, diff + diff_idx, diff_stride,
-                         txb_width, txb_height, visible_cols, visible_rows,
-                         x->qindex);
-  }
-#endif
-  diff += ((blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]);
-  return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
-}
-
-int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
-                     int *val_count) {
-  const int max_pix_val = 1 << 8;
-  memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
-  for (int r = 0; r < rows; ++r) {
-    for (int c = 0; c < cols; ++c) {
-      const int this_val = src[r * stride + c];
-      assert(this_val < max_pix_val);
-      ++val_count[this_val];
-    }
-  }
-  int n = 0;
-  for (int i = 0; i < max_pix_val; ++i) {
-    if (val_count[i]) ++n;
-  }
-  return n;
-}
-
-int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
-                            int bit_depth, int *val_count) {
-  assert(bit_depth <= 12);
-  const int max_pix_val = 1 << bit_depth;
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
-  for (int r = 0; r < rows; ++r) {
-    for (int c = 0; c < cols; ++c) {
-      const int this_val = src[r * stride + c];
-      assert(this_val < max_pix_val);
-      if (this_val >= max_pix_val) return 0;
-      ++val_count[this_val];
-    }
-  }
-  int n = 0;
-  for (int i = 0; i < max_pix_val; ++i) {
-    if (val_count[i]) ++n;
-  }
-  return n;
-}
-
-static void inverse_transform_block_facade(MACROBLOCKD *xd, int plane,
-                                           int block, int blk_row, int blk_col,
-                                           int eob, int reduced_tx_set) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  const PLANE_TYPE plane_type = get_plane_type(plane);
-  const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
-  const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col,
-                                          tx_size, reduced_tx_set);
-  const int dst_stride = pd->dst.stride;
-  uint8_t *dst =
-      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-  av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
-                              dst_stride, eob, reduced_tx_set);
-}
-
-static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record, const uint32_t hash);
-
-static uint32_t get_intra_txb_hash(MACROBLOCK *x, int plane, int blk_row,
-                                   int blk_col, BLOCK_SIZE plane_bsize,
-                                   TX_SIZE tx_size) {
-  int16_t tmp_data[64 * 64];
-  const int diff_stride = block_size_wide[plane_bsize];
-  const int16_t *diff = x->plane[plane].src_diff;
-  const int16_t *cur_diff_row = diff + 4 * blk_row * diff_stride + 4 * blk_col;
-  const int txb_w = tx_size_wide[tx_size];
-  const int txb_h = tx_size_high[tx_size];
-  uint8_t *hash_data = (uint8_t *)cur_diff_row;
-  if (txb_w != diff_stride) {
-    int16_t *cur_hash_row = tmp_data;
-    for (int i = 0; i < txb_h; i++) {
-      memcpy(cur_hash_row, cur_diff_row, sizeof(*diff) * txb_w);
-      cur_hash_row += txb_w;
-      cur_diff_row += diff_stride;
-    }
-    hash_data = (uint8_t *)tmp_data;
-  }
-  CRC32C *crc = &x->mb_rd_record.crc_calculator;
-  const uint32_t hash = av1_get_crc32c_value(crc, hash_data, 2 * txb_w * txb_h);
-  return (hash << 5) + tx_size;
-}
-
-static INLINE void dist_block_tx_domain(MACROBLOCK *x, int plane, int block,
-                                        TX_SIZE tx_size, int64_t *out_dist,
-                                        int64_t *out_sse) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  // Transform domain distortion computation is more efficient as it does
-  // not involve an inverse transform, but it is less accurate.
-  const int buffer_length = av1_get_max_eob(tx_size);
-  int64_t this_sse;
-  // TX-domain results need to shift down to Q2/D10 to match pixel
-  // domain distortion values which are in Q2^2
-  int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
-  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
-  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-    *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse,
-                                       xd->bd);
-  else
-    *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse);
-
-  *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift);
-  *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift);
-}
-
-static INLINE int64_t dist_block_px_domain(const AV1_COMP *cpi, MACROBLOCK *x,
-                                           int plane, BLOCK_SIZE plane_bsize,
-                                           int block, int blk_row, int blk_col,
-                                           TX_SIZE tx_size) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const uint16_t eob = p->eobs[block];
-  const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
-  const int bsw = block_size_wide[tx_bsize];
-  const int bsh = block_size_high[tx_bsize];
-  const int src_stride = x->plane[plane].src.stride;
-  const int dst_stride = xd->plane[plane].dst.stride;
-  // Scale the transform block index to pixel unit.
-  const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0];
-  const int dst_idx = (blk_row * dst_stride + blk_col) << tx_size_wide_log2[0];
-  const uint8_t *src = &x->plane[plane].src.buf[src_idx];
-  const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
-  const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-
-  assert(cpi != NULL);
-  assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
-
-  uint8_t *recon;
-  DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
-
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    recon = CONVERT_TO_BYTEPTR(recon16);
-    av1_highbd_convolve_2d_copy_sr(CONVERT_TO_SHORTPTR(dst), dst_stride,
-                                   CONVERT_TO_SHORTPTR(recon), MAX_TX_SIZE, bsw,
-                                   bsh, NULL, NULL, 0, 0, NULL, xd->bd);
-  } else {
-    recon = (uint8_t *)recon16;
-    av1_convolve_2d_copy_sr(dst, dst_stride, recon, MAX_TX_SIZE, bsw, bsh, NULL,
-                            NULL, 0, 0, NULL);
-  }
-
-  const PLANE_TYPE plane_type = get_plane_type(plane);
-  TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size,
-                                    cpi->common.reduced_tx_set_used);
-  av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, recon,
-                              MAX_TX_SIZE, eob,
-                              cpi->common.reduced_tx_set_used);
-
-  return 16 * pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
-                         blk_row, blk_col, plane_bsize, tx_bsize);
-}
-
-static double get_mean(const int16_t *diff, int stride, int w, int h) {
-  double sum = 0.0;
-  for (int j = 0; j < h; ++j) {
-    for (int i = 0; i < w; ++i) {
-      sum += diff[j * stride + i];
-    }
-  }
-  assert(w > 0 && h > 0);
-  return sum / (w * h);
-}
-
-static double get_sse_norm(const int16_t *diff, int stride, int w, int h) {
-  double sum = 0.0;
-  for (int j = 0; j < h; ++j) {
-    for (int i = 0; i < w; ++i) {
-      const int err = diff[j * stride + i];
-      sum += err * err;
-    }
-  }
-  assert(w > 0 && h > 0);
-  return sum / (w * h);
-}
-
-static double get_sad_norm(const int16_t *diff, int stride, int w, int h) {
-  double sum = 0.0;
-  for (int j = 0; j < h; ++j) {
-    for (int i = 0; i < w; ++i) {
-      sum += abs(diff[j * stride + i]);
-    }
-  }
-  assert(w > 0 && h > 0);
-  return sum / (w * h);
-}
-
-static void get_2x2_normalized_sses_and_sads(
-    const AV1_COMP *const cpi, BLOCK_SIZE tx_bsize, const uint8_t *const src,
-    int src_stride, const uint8_t *const dst, int dst_stride,
-    const int16_t *const src_diff, int diff_stride, double *const sse_norm_arr,
-    double *const sad_norm_arr) {
-  const BLOCK_SIZE tx_bsize_half =
-      get_partition_subsize(tx_bsize, PARTITION_SPLIT);
-  if (tx_bsize_half == BLOCK_INVALID) {  // manually calculate stats
-    const int half_width = block_size_wide[tx_bsize] / 2;
-    const int half_height = block_size_high[tx_bsize] / 2;
-    for (int row = 0; row < 2; ++row) {
-      for (int col = 0; col < 2; ++col) {
-        const int16_t *const this_src_diff =
-            src_diff + row * half_height * diff_stride + col * half_width;
-        if (sse_norm_arr) {
-          sse_norm_arr[row * 2 + col] =
-              get_sse_norm(this_src_diff, diff_stride, half_width, half_height);
-        }
-        if (sad_norm_arr) {
-          sad_norm_arr[row * 2 + col] =
-              get_sad_norm(this_src_diff, diff_stride, half_width, half_height);
-        }
-      }
-    }
-  } else {  // use function pointers to calculate stats
-    const int half_width = block_size_wide[tx_bsize_half];
-    const int half_height = block_size_high[tx_bsize_half];
-    const int num_samples_half = half_width * half_height;
-    for (int row = 0; row < 2; ++row) {
-      for (int col = 0; col < 2; ++col) {
-        const uint8_t *const this_src =
-            src + row * half_height * src_stride + col * half_width;
-        const uint8_t *const this_dst =
-            dst + row * half_height * dst_stride + col * half_width;
-
-        if (sse_norm_arr) {
-          unsigned int this_sse;
-          cpi->fn_ptr[tx_bsize_half].vf(this_src, src_stride, this_dst,
-                                        dst_stride, &this_sse);
-          sse_norm_arr[row * 2 + col] = (double)this_sse / num_samples_half;
-        }
-
-        if (sad_norm_arr) {
-          const unsigned int this_sad = cpi->fn_ptr[tx_bsize_half].sdf(
-              this_src, src_stride, this_dst, dst_stride);
-          sad_norm_arr[row * 2 + col] = (double)this_sad / num_samples_half;
-        }
-      }
-    }
-  }
-}
-
-// NOTE: CONFIG_COLLECT_RD_STATS has 3 possible values
-// 0: Do not collect any RD stats
-// 1: Collect RD stats for transform units
-// 2: Collect RD stats for partition units
-#if CONFIG_COLLECT_RD_STATS
-
-#if CONFIG_COLLECT_RD_STATS == 1
-static void PrintTransformUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    const RD_STATS *const rd_stats, int blk_row,
-                                    int blk_col, BLOCK_SIZE plane_bsize,
-                                    TX_SIZE tx_size, TX_TYPE tx_type,
-                                    int64_t rd) {
-  if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return;
-
-  // Generate small sample to restrict output size.
-  static unsigned int seed = 21743;
-  if (lcg_rand16(&seed) % 256 > 0) return;
-
-  const char output_file[] = "tu_stats.txt";
-  FILE *fout = fopen(output_file, "a");
-  if (!fout) return;
-
-  const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const int plane = 0;
-  struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int txw = tx_size_wide[tx_size];
-  const int txh = tx_size_high[tx_size];
-  const int dequant_shift =
-      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
-  const int q_step = pd->dequant_Q3[1] >> dequant_shift;
-  const double num_samples = txw * txh;
-
-  const double rate_norm = (double)rd_stats->rate / num_samples;
-  const double dist_norm = (double)rd_stats->dist / num_samples;
-
-  fprintf(fout, "%g %g", rate_norm, dist_norm);
-
-  const int src_stride = p->src.stride;
-  const uint8_t *const src =
-      &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
-  const int dst_stride = pd->dst.stride;
-  const uint8_t *const dst =
-      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-  unsigned int sse;
-  cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
-  const double sse_norm = (double)sse / num_samples;
-
-  const unsigned int sad =
-      cpi->fn_ptr[tx_bsize].sdf(src, src_stride, dst, dst_stride);
-  const double sad_norm = (double)sad / num_samples;
-
-  fprintf(fout, " %g %g", sse_norm, sad_norm);
-
-  const int diff_stride = block_size_wide[plane_bsize];
-  const int16_t *const src_diff =
-      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
-
-  double sse_norm_arr[4], sad_norm_arr[4];
-  get_2x2_normalized_sses_and_sads(cpi, tx_bsize, src, src_stride, dst,
-                                   dst_stride, src_diff, diff_stride,
-                                   sse_norm_arr, sad_norm_arr);
-  for (int i = 0; i < 4; ++i) {
-    fprintf(fout, " %g", sse_norm_arr[i]);
-  }
-  for (int i = 0; i < 4; ++i) {
-    fprintf(fout, " %g", sad_norm_arr[i]);
-  }
-
-  const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type];
-  const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type];
-
-  fprintf(fout, " %d %d %d %d %d", q_step, tx_size_wide[tx_size],
-          tx_size_high[tx_size], tx_type_1d_row, tx_type_1d_col);
-
-  int model_rate;
-  int64_t model_dist;
-  model_rd_sse_fn[MODELRD_CURVFIT](cpi, x, tx_bsize, plane, sse, num_samples,
-                                   &model_rate, &model_dist);
-  const double model_rate_norm = (double)model_rate / num_samples;
-  const double model_dist_norm = (double)model_dist / num_samples;
-  fprintf(fout, " %g %g", model_rate_norm, model_dist_norm);
-
-  const double mean = get_mean(src_diff, diff_stride, txw, txh);
-  double hor_corr, vert_corr;
-  get_horver_correlation(src_diff, diff_stride, txw, txh, &hor_corr,
-                         &vert_corr);
-  fprintf(fout, " %g %g %g", mean, hor_corr, vert_corr);
-
-  double hdist[4] = { 0 }, vdist[4] = { 0 };
-  get_energy_distribution_fine(cpi, tx_bsize, src, src_stride, dst, dst_stride,
-                               1, hdist, vdist);
-  fprintf(fout, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2],
-          hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]);
-
-  fprintf(fout, " %d %" PRId64, x->rdmult, rd);
-
-  fprintf(fout, "\n");
-  fclose(fout);
-}
-#endif  // CONFIG_COLLECT_RD_STATS == 1
-
-#if CONFIG_COLLECT_RD_STATS >= 2
-static void PrintPredictionUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                     const RD_STATS *const rd_stats,
-                                     BLOCK_SIZE plane_bsize) {
-  if (rd_stats->invalid_rate) return;
-  if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return;
-
-  // Generate small sample to restrict output size.
-  static unsigned int seed = 95014;
-  if (lcg_rand16(&seed) % 256 > 0) return;
-
-  const char output_file[] = "pu_stats.txt";
-  FILE *fout = fopen(output_file, "a");
-  if (!fout) return;
-
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const int plane = 0;
-  struct macroblock_plane *const p = &x->plane[plane];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int diff_stride = block_size_wide[plane_bsize];
-  int bw, bh;
-  get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, &bw,
-                     &bh);
-  const int num_samples = bw * bh;
-  const int dequant_shift =
-      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
-  const int q_step = pd->dequant_Q3[1] >> dequant_shift;
-
-  const double rate_norm = (double)rd_stats->rate / num_samples;
-  const double dist_norm = (double)rd_stats->dist / num_samples;
-  const double rdcost_norm =
-      (double)RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) / num_samples;
-
-  fprintf(fout, "%g %g %g", rate_norm, dist_norm, rdcost_norm);
-
-  const int src_stride = p->src.stride;
-  const uint8_t *const src = p->src.buf;
-  const int dst_stride = pd->dst.stride;
-  const uint8_t *const dst = pd->dst.buf;
-  const int16_t *const src_diff = p->src_diff;
-  const int shift = (xd->bd - 8);
-
-  int64_t sse = aom_sum_squares_2d_i16(src_diff, diff_stride, bw, bh);
-  sse = ROUND_POWER_OF_TWO(sse, shift * 2);
-  const double sse_norm = (double)sse / num_samples;
-
-  const unsigned int sad =
-      cpi->fn_ptr[plane_bsize].sdf(src, src_stride, dst, dst_stride);
-  const double sad_norm =
-      (double)sad / (1 << num_pels_log2_lookup[plane_bsize]);
-
-  fprintf(fout, " %g %g", sse_norm, sad_norm);
-
-  double sse_norm_arr[4], sad_norm_arr[4];
-  get_2x2_normalized_sses_and_sads(cpi, plane_bsize, src, src_stride, dst,
-                                   dst_stride, src_diff, diff_stride,
-                                   sse_norm_arr, sad_norm_arr);
-  if (shift) {
-    for (int k = 0; k < 4; ++k) sse_norm_arr[k] /= (1 << (2 * shift));
-    for (int k = 0; k < 4; ++k) sad_norm_arr[k] /= (1 << shift);
-  }
-  for (int i = 0; i < 4; ++i) {
-    fprintf(fout, " %g", sse_norm_arr[i]);
-  }
-  for (int i = 0; i < 4; ++i) {
-    fprintf(fout, " %g", sad_norm_arr[i]);
-  }
-
-  fprintf(fout, " %d %d %d %d", q_step, x->rdmult, bw, bh);
-
-  int model_rate;
-  int64_t model_dist;
-  model_rd_sse_fn[MODELRD_CURVFIT](cpi, x, plane_bsize, plane, sse, num_samples,
-                                   &model_rate, &model_dist);
-  const double model_rdcost_norm =
-      (double)RDCOST(x->rdmult, model_rate, model_dist) / num_samples;
-  const double model_rate_norm = (double)model_rate / num_samples;
-  const double model_dist_norm = (double)model_dist / num_samples;
-  fprintf(fout, " %g %g %g", model_rate_norm, model_dist_norm,
-          model_rdcost_norm);
-
-  double mean = get_mean(src_diff, diff_stride, bw, bh);
-  mean /= (1 << shift);
-  double hor_corr, vert_corr;
-  get_horver_correlation(src_diff, diff_stride, bw, bh, &hor_corr, &vert_corr);
-  fprintf(fout, " %g %g %g", mean, hor_corr, vert_corr);
-
-  double hdist[4] = { 0 }, vdist[4] = { 0 };
-  get_energy_distribution_fine(cpi, plane_bsize, src, src_stride, dst,
-                               dst_stride, 1, hdist, vdist);
-  fprintf(fout, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2],
-          hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]);
-
-  fprintf(fout, "\n");
-  fclose(fout);
-}
-#endif  // CONFIG_COLLECT_RD_STATS >= 2
-#endif  // CONFIG_COLLECT_RD_STATS
-
-static void model_rd_with_dnn(const AV1_COMP *const cpi,
-                              const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
-                              int plane, int64_t sse, int num_samples,
-                              int *rate, int64_t *dist) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int log_numpels = num_pels_log2_lookup[plane_bsize];
-
-  const int dequant_shift =
-      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
-  const int q_step = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1);
-
-  const struct macroblock_plane *const p = &x->plane[plane];
-  int bw, bh;
-  get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, &bw,
-                     &bh);
-  const int src_stride = p->src.stride;
-  const uint8_t *const src = p->src.buf;
-  const int dst_stride = pd->dst.stride;
-  const uint8_t *const dst = pd->dst.buf;
-  const int16_t *const src_diff = p->src_diff;
-  const int diff_stride = block_size_wide[plane_bsize];
-  const int shift = (xd->bd - 8);
-
-  if (sse == 0) {
-    if (rate) *rate = 0;
-    if (dist) *dist = 0;
-    return;
-  }
-  if (plane) {
-    int model_rate;
-    int64_t model_dist;
-    model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, num_samples,
-                          &model_rate, &model_dist);
-    if (rate) *rate = model_rate;
-    if (dist) *dist = model_dist;
-    return;
-  }
-
-  aom_clear_system_state();
-  const double sse_norm = (double)sse / num_samples;
-
-  double sse_norm_arr[4];
-  get_2x2_normalized_sses_and_sads(cpi, plane_bsize, src, src_stride, dst,
-                                   dst_stride, src_diff, diff_stride,
-                                   sse_norm_arr, NULL);
-  double mean = get_mean(src_diff, bw, bw, bh);
-  if (shift) {
-    for (int k = 0; k < 4; ++k) sse_norm_arr[k] /= (1 << (2 * shift));
-    mean /= (1 << shift);
-  }
-  double sse_norm_sum = 0.0, sse_frac_arr[3];
-  for (int k = 0; k < 4; ++k) sse_norm_sum += sse_norm_arr[k];
-  for (int k = 0; k < 3; ++k)
-    sse_frac_arr[k] =
-        sse_norm_sum > 0.0 ? sse_norm_arr[k] / sse_norm_sum : 0.25;
-  const double q_sqr = (double)(q_step * q_step);
-  const double q_sqr_by_sse_norm = q_sqr / (sse_norm + 1.0);
-  const double mean_sqr_by_sse_norm = mean * mean / (sse_norm + 1.0);
-  double hor_corr, vert_corr;
-  get_horver_correlation(src_diff, diff_stride, bw, bh, &hor_corr, &vert_corr);
-
-  float features[NUM_FEATURES_PUSTATS];
-  features[0] = (float)hor_corr;
-  features[1] = (float)log_numpels;
-  features[2] = (float)mean_sqr_by_sse_norm;
-  features[3] = (float)q_sqr_by_sse_norm;
-  features[4] = (float)sse_frac_arr[0];
-  features[5] = (float)sse_frac_arr[1];
-  features[6] = (float)sse_frac_arr[2];
-  features[7] = (float)vert_corr;
-
-  float rate_f, dist_by_sse_norm_f;
-  av1_nn_predict(features, &av1_pustats_dist_nnconfig, &dist_by_sse_norm_f);
-  av1_nn_predict(features, &av1_pustats_rate_nnconfig, &rate_f);
-  const float dist_f = (float)((double)dist_by_sse_norm_f * (1.0 + sse_norm));
-  int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5);
-  int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5);
-  aom_clear_system_state();
-
-  // Check if skip is better
-  if (rate_i == 0) {
-    dist_i = sse << 4;
-  } else if (RDCOST(x->rdmult, rate_i, dist_i) >=
-             RDCOST(x->rdmult, 0, sse << 4)) {
-    rate_i = 0;
-    dist_i = sse << 4;
-  }
-
-  if (rate) *rate = rate_i;
-  if (dist) *dist = dist_i;
-  return;
-}
-
-static void model_rd_for_sb_with_dnn(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
-  (void)mi_row;
-  (void)mi_col;
-  // Note our transform coeffs are 8 times an orthogonal transform.
-  // Hence quantizer step is also 8 times. To get effective quantizer
-  // we need to divide by 8 before sending to modeling function.
-  const int ref = xd->mi[0]->ref_frame[0];
-
-  int64_t rate_sum = 0;
-  int64_t dist_sum = 0;
-  int64_t total_sse = 0;
-
-  for (int plane = plane_from; plane <= plane_to; ++plane) {
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    int64_t dist, sse;
-    int rate;
-
-    if (x->skip_chroma_rd && plane) continue;
-
-    const struct macroblock_plane *const p = &x->plane[plane];
-    const int shift = (xd->bd - 8);
-    int bw, bh;
-    get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL,
-                       &bw, &bh);
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
-                           pd->dst.stride, bw, bh);
-    } else {
-      sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
-                    bh);
-    }
-    sse = ROUND_POWER_OF_TWO(sse, shift * 2);
-
-    model_rd_with_dnn(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist);
-
-    if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
-    total_sse += sse;
-    rate_sum += rate;
-    dist_sum += dist;
-
-    if (plane_rate) plane_rate[plane] = rate;
-    if (plane_sse) plane_sse[plane] = sse;
-    if (plane_dist) plane_dist[plane] = dist;
-  }
-
-  if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
-  if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
-  *out_rate_sum = (int)rate_sum;
-  *out_dist_sum = dist_sum;
-}
-
-// Fits a surface for rate and distortion using as features:
-// log2(sse_norm + 1) and log2(sse_norm/qstep^2)
-static void model_rd_with_surffit(const AV1_COMP *const cpi,
-                                  const MACROBLOCK *const x,
-                                  BLOCK_SIZE plane_bsize, int plane,
-                                  int64_t sse, int num_samples, int *rate,
-                                  int64_t *dist) {
-  (void)cpi;
-  (void)plane_bsize;
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int dequant_shift =
-      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
-  const int qstep = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1);
-  if (sse == 0) {
-    if (rate) *rate = 0;
-    if (dist) *dist = 0;
-    return;
-  }
-  aom_clear_system_state();
-  const double sse_norm = (double)sse / num_samples;
-  const double qstepsqr = (double)qstep * qstep;
-  const double xm = log(sse_norm + 1.0) / log(2.0);
-  const double yl = log(sse_norm / qstepsqr) / log(2.0);
-  double rate_f, dist_by_sse_norm_f;
-
-  av1_model_rd_surffit(xm, yl, &rate_f, &dist_by_sse_norm_f);
-
-  const double dist_f = dist_by_sse_norm_f * sse_norm;
-  int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5);
-  int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5);
-  aom_clear_system_state();
-
-  // Check if skip is better
-  if (rate_i == 0) {
-    dist_i = sse << 4;
-  } else if (RDCOST(x->rdmult, rate_i, dist_i) >=
-             RDCOST(x->rdmult, 0, sse << 4)) {
-    rate_i = 0;
-    dist_i = sse << 4;
-  }
-
-  if (rate) *rate = rate_i;
-  if (dist) *dist = dist_i;
-}
-
-static void model_rd_for_sb_with_surffit(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
-  (void)mi_row;
-  (void)mi_col;
-  // Note our transform coeffs are 8 times an orthogonal transform.
-  // Hence quantizer step is also 8 times. To get effective quantizer
-  // we need to divide by 8 before sending to modeling function.
-  const int ref = xd->mi[0]->ref_frame[0];
-
-  int64_t rate_sum = 0;
-  int64_t dist_sum = 0;
-  int64_t total_sse = 0;
-
-  for (int plane = plane_from; plane <= plane_to; ++plane) {
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    int64_t dist, sse;
-    int rate;
-
-    if (x->skip_chroma_rd && plane) continue;
-
-    int bw, bh;
-    const struct macroblock_plane *const p = &x->plane[plane];
-    const int shift = (xd->bd - 8);
-    get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL,
-                       &bw, &bh);
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
-                           pd->dst.stride, bw, bh);
-    } else {
-      sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
-                    bh);
-    }
-    sse = ROUND_POWER_OF_TWO(sse, shift * 2);
-
-    model_rd_with_surffit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate,
-                          &dist);
-
-    if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
-    total_sse += sse;
-    rate_sum += rate;
-    dist_sum += dist;
-
-    if (plane_rate) plane_rate[plane] = rate;
-    if (plane_sse) plane_sse[plane] = sse;
-    if (plane_dist) plane_dist[plane] = dist;
-  }
-
-  if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
-  if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
-  *out_rate_sum = (int)rate_sum;
-  *out_dist_sum = dist_sum;
-}
-
-// Fits a curve for rate and distortion using as feature:
-// log2(sse_norm/qstep^2)
-static void model_rd_with_curvfit(const AV1_COMP *const cpi,
-                                  const MACROBLOCK *const x,
-                                  BLOCK_SIZE plane_bsize, int plane,
-                                  int64_t sse, int num_samples, int *rate,
-                                  int64_t *dist) {
-  (void)cpi;
-  (void)plane_bsize;
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int dequant_shift =
-      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
-  const int qstep = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1);
-
-  if (sse == 0) {
-    if (rate) *rate = 0;
-    if (dist) *dist = 0;
-    return;
-  }
-  aom_clear_system_state();
-  const double sse_norm = (double)sse / num_samples;
-  const double qstepsqr = (double)qstep * qstep;
-  const double xqr = log(sse_norm / qstepsqr) / log(2.0);
-
-  double rate_f, dist_by_sse_norm_f;
-  av1_model_rd_curvfit(xqr, &rate_f, &dist_by_sse_norm_f);
-
-  const double dist_f = dist_by_sse_norm_f * sse_norm;
-  int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5);
-  int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5);
-  aom_clear_system_state();
-
-  // Check if skip is better
-  if (rate_i == 0) {
-    dist_i = sse << 4;
-  } else if (RDCOST(x->rdmult, rate_i, dist_i) >=
-             RDCOST(x->rdmult, 0, sse << 4)) {
-    rate_i = 0;
-    dist_i = sse << 4;
-  }
-
-  if (rate) *rate = rate_i;
-  if (dist) *dist = dist_i;
-}
-
-static void model_rd_for_sb_with_curvfit(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
-  (void)mi_row;
-  (void)mi_col;
-  // Note our transform coeffs are 8 times an orthogonal transform.
-  // Hence quantizer step is also 8 times. To get effective quantizer
-  // we need to divide by 8 before sending to modeling function.
-  const int ref = xd->mi[0]->ref_frame[0];
-
-  int64_t rate_sum = 0;
-  int64_t dist_sum = 0;
-  int64_t total_sse = 0;
-
-  for (int plane = plane_from; plane <= plane_to; ++plane) {
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    int64_t dist, sse;
-    int rate;
-
-    if (x->skip_chroma_rd && plane) continue;
-
-    int bw, bh;
-    const struct macroblock_plane *const p = &x->plane[plane];
-    const int shift = (xd->bd - 8);
-    get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL,
-                       &bw, &bh);
-
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
-                           pd->dst.stride, bw, bh);
-    } else {
-      sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
-                    bh);
-    }
-
-    sse = ROUND_POWER_OF_TWO(sse, shift * 2);
-    model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate,
-                          &dist);
-
-    if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
-    total_sse += sse;
-    rate_sum += rate;
-    dist_sum += dist;
-
-    if (plane_rate) plane_rate[plane] = rate;
-    if (plane_sse) plane_sse[plane] = sse;
-    if (plane_dist) plane_dist[plane] = dist;
-  }
-
-  if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
-  if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
-  *out_rate_sum = (int)rate_sum;
-  *out_dist_sum = dist_sum;
-}
-
-static void model_rd_for_sb_with_fullrdy(
-    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
-    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
-    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
-    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
-  const int ref = xd->mi[0]->ref_frame[0];
-
-  int64_t rate_sum = 0;
-  int64_t dist_sum = 0;
-  int64_t total_sse = 0;
-
-  for (int plane = plane_from; plane <= plane_to; ++plane) {
-    struct macroblock_plane *const p = &x->plane[plane];
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    const int bw = block_size_wide[plane_bsize];
-    const int bh = block_size_high[plane_bsize];
-    int64_t sse;
-    int rate;
-    int64_t dist;
-
-    if (x->skip_chroma_rd && plane) continue;
-
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf,
-                           pd->dst.stride, bw, bh);
-    } else {
-      sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw,
-                    bh);
-    }
-    sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
-
-    RD_STATS rd_stats;
-    if (plane == 0) {
-      select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX);
-      if (rd_stats.invalid_rate) {
-        rate = 0;
-        dist = sse << 4;
-      } else {
-        rate = rd_stats.rate;
-        dist = rd_stats.dist;
-      }
-    } else {
-      model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate,
-                            &dist);
-    }
-
-    if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
-
-    total_sse += sse;
-    rate_sum += rate;
-    dist_sum += dist;
-
-    if (plane_rate) plane_rate[plane] = rate;
-    if (plane_sse) plane_sse[plane] = sse;
-    if (plane_dist) plane_dist[plane] = dist;
-  }
-
-  if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0;
-  if (skip_sse_sb) *skip_sse_sb = total_sse << 4;
-  *out_rate_sum = (int)rate_sum;
-  *out_dist_sum = dist_sum;
-}
-
-static int64_t search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
-                               int block, int blk_row, int blk_col,
-                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                               const TXB_CTX *const txb_ctx,
-                               FAST_TX_SEARCH_MODE ftxs_mode,
-                               int use_fast_coef_costing, int64_t ref_best_rd,
-                               RD_STATS *best_rd_stats) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const int is_inter = is_inter_block(mbmi);
-  int64_t best_rd = INT64_MAX;
-  uint16_t best_eob = 0;
-  TX_TYPE best_tx_type = DCT_DCT;
-  TX_TYPE last_tx_type = TX_TYPES;
-  const int fast_tx_search = ftxs_mode & FTXS_DCT_AND_1D_DCT_ONLY;
-  // The buffer used to swap dqcoeff in macroblockd_plane so we can keep dqcoeff
-  // of the best tx_type
-  DECLARE_ALIGNED(32, tran_low_t, this_dqcoeff[MAX_SB_SQUARE]);
-  tran_low_t *orig_dqcoeff = pd->dqcoeff;
-  tran_low_t *best_dqcoeff = this_dqcoeff;
-  const int txk_type_idx =
-      av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
-  av1_invalid_rd_stats(best_rd_stats);
-
-  TXB_RD_INFO *intra_txb_rd_info = NULL;
-  uint16_t cur_joint_ctx = 0;
-  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
-  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
-  const int within_border =
-      mi_row >= xd->tile.mi_row_start &&
-      (mi_row + mi_size_high[plane_bsize] < xd->tile.mi_row_end) &&
-      mi_col >= xd->tile.mi_col_start &&
-      (mi_col + mi_size_wide[plane_bsize] < xd->tile.mi_col_end);
-  if (within_border && cpi->sf.use_intra_txb_hash && frame_is_intra_only(cm) &&
-      !is_inter && plane == 0 &&
-      tx_size_wide[tx_size] == tx_size_high[tx_size]) {
-    const uint32_t intra_hash =
-        get_intra_txb_hash(x, plane, blk_row, blk_col, plane_bsize, tx_size);
-    const int intra_hash_idx =
-        find_tx_size_rd_info(&x->txb_rd_record_intra, intra_hash);
-    intra_txb_rd_info = &x->txb_rd_record_intra.tx_rd_info[intra_hash_idx];
-
-    cur_joint_ctx = (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx;
-    if (intra_txb_rd_info->entropy_context == cur_joint_ctx &&
-        x->txb_rd_record_intra.tx_rd_info[intra_hash_idx].valid) {
-      mbmi->txk_type[txk_type_idx] = intra_txb_rd_info->tx_type;
-      const TX_TYPE ref_tx_type =
-          av1_get_tx_type(get_plane_type(plane), &x->e_mbd, blk_row, blk_col,
-                          tx_size, cpi->common.reduced_tx_set_used);
-      if (ref_tx_type == intra_txb_rd_info->tx_type) {
-        best_rd_stats->rate = intra_txb_rd_info->rate;
-        best_rd_stats->dist = intra_txb_rd_info->dist;
-        best_rd_stats->sse = intra_txb_rd_info->sse;
-        best_rd_stats->skip = intra_txb_rd_info->eob == 0;
-        x->plane[plane].eobs[block] = intra_txb_rd_info->eob;
-        x->plane[plane].txb_entropy_ctx[block] =
-            intra_txb_rd_info->txb_entropy_ctx;
-        best_rd = RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist);
-        best_eob = intra_txb_rd_info->eob;
-        best_tx_type = intra_txb_rd_info->tx_type;
-        update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
-                         best_tx_type);
-        goto RECON_INTRA;
-      }
-    }
-  }
-
-  int rate_cost = 0;
-  TX_TYPE txk_start = DCT_DCT;
-  TX_TYPE txk_end = TX_TYPES - 1;
-  if ((!is_inter && x->use_default_intra_tx_type) ||
-      (is_inter && x->use_default_inter_tx_type)) {
-    txk_start = txk_end = get_default_tx_type(0, xd, tx_size);
-  } else if (x->rd_model == LOW_TXFM_RD || x->cb_partition_scan) {
-    if (plane == 0) txk_end = DCT_DCT;
-  }
-
-  uint8_t best_txb_ctx = 0;
-  const TxSetType tx_set_type =
-      av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used);
-
-  TX_TYPE uv_tx_type = DCT_DCT;
-  if (plane) {
-    // tx_type of PLANE_TYPE_UV should be the same as PLANE_TYPE_Y
-    uv_tx_type = txk_start = txk_end =
-        av1_get_tx_type(get_plane_type(plane), xd, blk_row, blk_col, tx_size,
-                        cm->reduced_tx_set_used);
-  }
-  const uint16_t ext_tx_used_flag = av1_ext_tx_used_flag[tx_set_type];
-  if (xd->lossless[mbmi->segment_id] || txsize_sqr_up_map[tx_size] > TX_32X32 ||
-      ext_tx_used_flag == 0x0001) {
-    txk_start = txk_end = DCT_DCT;
-  }
-  uint16_t allowed_tx_mask = 0;  // 1: allow; 0: skip.
-  if (txk_start == txk_end) {
-    allowed_tx_mask = 1 << txk_start;
-    allowed_tx_mask &= ext_tx_used_flag;
-  } else if (fast_tx_search) {
-    allowed_tx_mask = 0x0c01;  // V_DCT, H_DCT, DCT_DCT
-    allowed_tx_mask &= ext_tx_used_flag;
-  } else {
-    assert(plane == 0);
-    allowed_tx_mask = ext_tx_used_flag;
-    // !fast_tx_search && txk_end != txk_start && plane == 0
-    const int do_prune = cpi->sf.tx_type_search.prune_mode > NO_PRUNE;
-    if (do_prune && is_inter) {
-      if (cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE) {
-        const uint16_t prune =
-            prune_tx_2D(x, plane_bsize, tx_size, blk_row, blk_col, tx_set_type,
-                        cpi->sf.tx_type_search.prune_mode);
-        allowed_tx_mask &= (~prune);
-      } else {
-        allowed_tx_mask &= (~x->tx_search_prune[tx_set_type]);
-      }
-    }
-  }
-  // Need to have at least one transform type allowed.
-  if (allowed_tx_mask == 0) {
-    txk_start = txk_end = (plane ? uv_tx_type : DCT_DCT);
-    allowed_tx_mask = (1 << txk_start);
-  }
-
-  int use_transform_domain_distortion =
-      (cpi->sf.use_transform_domain_distortion > 0) &&
-      // Any 64-pt transforms only preserves half the coefficients.
-      // Therefore transform domain distortion is not valid for these
-      // transform sizes.
-      txsize_sqr_up_map[tx_size] != TX_64X64;
-#if CONFIG_DIST_8X8
-  if (x->using_dist_8x8) use_transform_domain_distortion = 0;
-#endif
-  int calc_pixel_domain_distortion_final =
-      cpi->sf.use_transform_domain_distortion == 1 &&
-      use_transform_domain_distortion && x->rd_model != LOW_TXFM_RD &&
-      !x->cb_partition_scan;
-  if (calc_pixel_domain_distortion_final &&
-      (txk_start == txk_end || allowed_tx_mask == 0x0001))
-    calc_pixel_domain_distortion_final = use_transform_domain_distortion = 0;
-
-  const uint16_t *eobs_ptr = x->plane[plane].eobs;
-
-  const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
-  int64_t block_sse =
-      pixel_diff_dist(x, plane, blk_row, blk_col, plane_bsize, tx_bsize);
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-    block_sse = ROUND_POWER_OF_TWO(block_sse, (xd->bd - 8) * 2);
-  block_sse *= 16;
-
-  for (TX_TYPE tx_type = txk_start; tx_type <= txk_end; ++tx_type) {
-    if (!(allowed_tx_mask & (1 << tx_type))) continue;
-    if (plane == 0) mbmi->txk_type[txk_type_idx] = tx_type;
-    RD_STATS this_rd_stats;
-    av1_invalid_rd_stats(&this_rd_stats);
-
-    if (!cpi->optimize_seg_arr[mbmi->segment_id]) {
-      av1_xform_quant(
-          cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type,
-          USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
-      rate_cost = av1_cost_coeffs(cm, x, plane, block, tx_size, tx_type,
-                                  txb_ctx, use_fast_coef_costing);
-    } else {
-      av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
-                      tx_size, tx_type, AV1_XFORM_QUANT_FP);
-      if (cpi->sf.optimize_b_precheck && best_rd < INT64_MAX &&
-          eobs_ptr[block] >= 4) {
-        // Calculate distortion quickly in transform domain.
-        dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist,
-                             &this_rd_stats.sse);
-
-        const int64_t best_rd_ = AOMMIN(best_rd, ref_best_rd);
-        const int64_t dist_cost_estimate =
-            RDCOST(x->rdmult, 0, AOMMIN(this_rd_stats.dist, this_rd_stats.sse));
-        if (dist_cost_estimate - (dist_cost_estimate >> 3) > best_rd_) continue;
-
-        rate_cost = av1_cost_coeffs(cm, x, plane, block, tx_size, tx_type,
-                                    txb_ctx, use_fast_coef_costing);
-        const int64_t rd_estimate =
-            AOMMIN(RDCOST(x->rdmult, rate_cost, this_rd_stats.dist),
-                   RDCOST(x->rdmult, 0, this_rd_stats.sse));
-        if (rd_estimate - (rd_estimate >> 3) > best_rd_) continue;
-      }
-      av1_optimize_b(cpi, x, plane, block, tx_size, tx_type, txb_ctx, 1,
-                     &rate_cost);
-    }
-    if (eobs_ptr[block] == 0) {
-      // When eob is 0, pixel domain distortion is more efficient and accurate.
-      this_rd_stats.dist = this_rd_stats.sse = block_sse;
-    } else if (use_transform_domain_distortion) {
-      dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist,
-                           &this_rd_stats.sse);
-    } else {
-      this_rd_stats.dist = dist_block_px_domain(
-          cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size);
-      this_rd_stats.sse = block_sse;
-    }
-
-    this_rd_stats.rate = rate_cost;
-
-    const int64_t rd =
-        RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
-
-    if (rd < best_rd) {
-      best_rd = rd;
-      *best_rd_stats = this_rd_stats;
-      best_tx_type = tx_type;
-      best_txb_ctx = x->plane[plane].txb_entropy_ctx[block];
-      best_eob = x->plane[plane].eobs[block];
-      last_tx_type = best_tx_type;
-
-      // Swap qcoeff and dqcoeff buffers
-      tran_low_t *const tmp_dqcoeff = best_dqcoeff;
-      best_dqcoeff = pd->dqcoeff;
-      pd->dqcoeff = tmp_dqcoeff;
-    }
-
-#if CONFIG_COLLECT_RD_STATS == 1
-    if (plane == 0) {
-      PrintTransformUnitStats(cpi, x, &this_rd_stats, blk_row, blk_col,
-                              plane_bsize, tx_size, tx_type, rd);
-    }
-#endif  // CONFIG_COLLECT_RD_STATS == 1
-
-    if (cpi->sf.adaptive_txb_search_level) {
-      if ((best_rd - (best_rd >> cpi->sf.adaptive_txb_search_level)) >
-          ref_best_rd) {
-        break;
-      }
-    }
-
-    // Skip transform type search when we found the block has been quantized to
-    // all zero and at the same time, it has better rdcost than doing transform.
-    if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break;
-  }
-
-  assert(best_rd != INT64_MAX);
-
-  best_rd_stats->skip = best_eob == 0;
-  if (plane == 0) {
-    update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
-                     best_tx_type);
-  }
-  x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx;
-  x->plane[plane].eobs[block] = best_eob;
-
-  pd->dqcoeff = best_dqcoeff;
-
-  if (calc_pixel_domain_distortion_final && best_eob) {
-    best_rd_stats->dist = dist_block_px_domain(
-        cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size);
-    best_rd_stats->sse = block_sse;
-  }
-
-  if (intra_txb_rd_info != NULL) {
-    intra_txb_rd_info->valid = 1;
-    intra_txb_rd_info->entropy_context = cur_joint_ctx;
-    intra_txb_rd_info->rate = best_rd_stats->rate;
-    intra_txb_rd_info->dist = best_rd_stats->dist;
-    intra_txb_rd_info->sse = best_rd_stats->sse;
-    intra_txb_rd_info->eob = best_eob;
-    intra_txb_rd_info->txb_entropy_ctx = best_txb_ctx;
-    if (plane == 0) intra_txb_rd_info->tx_type = best_tx_type;
-  }
-
-RECON_INTRA:
-  if (!is_inter && best_eob &&
-      (blk_row + tx_size_high_unit[tx_size] < mi_size_high[plane_bsize] ||
-       blk_col + tx_size_wide_unit[tx_size] < mi_size_wide[plane_bsize])) {
-    // intra mode needs decoded result such that the next transform block
-    // can use it for prediction.
-    // if the last search tx_type is the best tx_type, we don't need to
-    // do this again
-    if (best_tx_type != last_tx_type) {
-      if (!cpi->optimize_seg_arr[mbmi->segment_id]) {
-        av1_xform_quant(
-            cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-            best_tx_type,
-            USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
-      } else {
-        av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
-                        tx_size, best_tx_type, AV1_XFORM_QUANT_FP);
-        av1_optimize_b(cpi, x, plane, block, tx_size, best_tx_type, txb_ctx, 1,
-                       &rate_cost);
-      }
-    }
-
-    inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
-                                   x->plane[plane].eobs[block],
-                                   cm->reduced_tx_set_used);
-
-    // This may happen because of hash collision. The eob stored in the hash
-    // table is non-zero, but the real eob is zero. We need to make sure tx_type
-    // is DCT_DCT in this case.
-    if (plane == 0 && x->plane[plane].eobs[block] == 0 &&
-        best_tx_type != DCT_DCT) {
-      update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
-                       DCT_DCT);
-    }
-  }
-  pd->dqcoeff = orig_dqcoeff;
-
-  return best_rd;
-}
-
-static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
-                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
-  struct rdcost_block_args *args = arg;
-  MACROBLOCK *const x = args->x;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const AV1_COMP *cpi = args->cpi;
-  ENTROPY_CONTEXT *a = args->t_above + blk_col;
-  ENTROPY_CONTEXT *l = args->t_left + blk_row;
-  const AV1_COMMON *cm = &cpi->common;
-  int64_t rd1, rd2, rd;
-  RD_STATS this_rd_stats;
-
-  av1_init_rd_stats(&this_rd_stats);
-
-  if (args->exit_early) {
-    args->incomplete_exit = 1;
-    return;
-  }
-
-  if (!is_inter_block(mbmi)) {
-    av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
-    av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
-  }
-  TXB_CTX txb_ctx;
-  get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
-  search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                  &txb_ctx, args->ftxs_mode, args->use_fast_coef_costing,
-                  args->best_rd - args->this_rd, &this_rd_stats);
-
-  if (plane == AOM_PLANE_Y && xd->cfl.store_y) {
-    assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
-    cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
-  }
-
-#if CONFIG_RD_DEBUG
-  av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
-                            this_rd_stats.rate);
-#endif  // CONFIG_RD_DEBUG
-  av1_set_txb_context(x, plane, block, tx_size, a, l);
-
-  const int blk_idx =
-      blk_row * (block_size_wide[plane_bsize] >> tx_size_wide_log2[0]) +
-      blk_col;
-
-  if (plane == 0)
-    set_blk_skip(x, plane, blk_idx, x->plane[plane].eobs[block] == 0);
-  else
-    set_blk_skip(x, plane, blk_idx, 0);
-
-  rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
-  rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
-
-  // TODO(jingning): temporarily enabled only for luma component
-  rd = AOMMIN(rd1, rd2);
-
-  this_rd_stats.skip &= !x->plane[plane].eobs[block];
-
-  av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
-
-  args->this_rd += rd;
-
-  if (args->this_rd > args->best_rd) {
-    args->exit_early = 1;
-    return;
-  }
-}
-
-static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
-                             RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
-                             BLOCK_SIZE bsize, TX_SIZE tx_size,
-                             int use_fast_coef_casting,
-                             FAST_TX_SEARCH_MODE ftxs_mode) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  struct rdcost_block_args args;
-  av1_zero(args);
-  args.x = x;
-  args.cpi = cpi;
-  args.best_rd = ref_best_rd;
-  args.use_fast_coef_costing = use_fast_coef_casting;
-  args.ftxs_mode = ftxs_mode;
-  av1_init_rd_stats(&args.rd_stats);
-
-  if (plane == 0) xd->mi[0]->tx_size = tx_size;
-
-  av1_get_entropy_contexts(bsize, pd, args.t_above, args.t_left);
-
-  av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
-                                         &args);
-
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int is_inter = is_inter_block(mbmi);
-  const int invalid_rd = is_inter ? args.incomplete_exit : args.exit_early;
-
-  if (invalid_rd) {
-    av1_invalid_rd_stats(rd_stats);
-  } else {
-    *rd_stats = args.rd_stats;
-  }
-}
-
-static int tx_size_cost(const AV1_COMMON *const cm, const MACROBLOCK *const x,
-                        BLOCK_SIZE bsize, TX_SIZE tx_size) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-
-  if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
-    const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
-    const int depth = tx_size_to_depth(tx_size, bsize);
-    const int tx_size_ctx = get_tx_size_context(xd);
-    int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
-    return r_tx_size;
-  } else {
-    return 0;
-  }
-}
-
-static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
-                        RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
-                        TX_SIZE tx_size, FAST_TX_SEARCH_MODE ftxs_mode) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  int64_t rd = INT64_MAX;
-  const int skip_ctx = av1_get_skip_context(xd);
-  int s0, s1;
-  const int is_inter = is_inter_block(mbmi);
-  const int tx_select =
-      cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type);
-  int ctx = txfm_partition_context(
-      xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size);
-  const int r_tx_size = is_inter ? x->txfm_partition_cost[ctx][0]
-                                 : tx_size_cost(cm, x, bs, tx_size);
-
-  assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
-
-  s0 = x->skip_cost[skip_ctx][0];
-  s1 = x->skip_cost[skip_ctx][1];
-
-  mbmi->tx_size = tx_size;
-  txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOM_PLANE_Y, bs, tx_size,
-                   cpi->sf.use_fast_coef_costing, ftxs_mode);
-  if (rd_stats->rate == INT_MAX) return INT64_MAX;
-
-  if (rd_stats->skip) {
-    if (is_inter) {
-      rd = RDCOST(x->rdmult, s1, rd_stats->sse);
-    } else {
-      rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse);
-    }
-  } else {
-    rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select,
-                rd_stats->dist);
-  }
-
-  if (tx_select) rd_stats->rate += r_tx_size;
-
-  if (is_inter && !xd->lossless[xd->mi[0]->segment_id] && !(rd_stats->skip))
-    rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
-
-  return rd;
-}
-
-static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
-                                   MACROBLOCK *x, int *r, int64_t *d, int *s,
-                                   int64_t *sse, int64_t ref_best_rd) {
-  RD_STATS rd_stats;
-  av1_subtract_plane(x, bs, 0);
-  x->rd_model = LOW_TXFM_RD;
-  int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs,
-                        max_txsize_rect_lookup[bs], FTXS_NONE);
-  x->rd_model = FULL_TXFM_RD;
-  *r = rd_stats.rate;
-  *d = rd_stats.dist;
-  *s = rd_stats.skip;
-  *sse = rd_stats.sse;
-  return rd;
-}
-
-static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                   RD_STATS *rd_stats, int64_t ref_best_rd,
-                                   BLOCK_SIZE bs) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int is_inter = is_inter_block(mbmi);
-  mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode);
-  const TxSetType tx_set_type =
-      av1_get_ext_tx_set_type(mbmi->tx_size, is_inter, cm->reduced_tx_set_used);
-  prune_tx(cpi, bs, x, xd, tx_set_type);
-  txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOM_PLANE_Y, bs,
-                   mbmi->tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE);
-  // Reset the pruning flags.
-  av1_zero(x->tx_search_prune);
-  x->tx_split_prune_flag = 0;
-}
-
-static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    RD_STATS *rd_stats, int64_t ref_best_rd,
-                                    BLOCK_SIZE bs) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-
-  mbmi->tx_size = TX_4X4;
-  txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
-                   cpi->sf.use_fast_coef_costing, FTXS_NONE);
-}
-
-static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
-  int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
-  return num_blk;
-}
-
-static int get_search_init_depth(int mi_width, int mi_height, int is_inter,
-                                 const SPEED_FEATURES *sf) {
-  if (sf->tx_size_search_method == USE_LARGESTALL) return MAX_VARTX_DEPTH;
-
-  if (sf->tx_size_search_lgr_block) {
-    if (mi_width > mi_size_wide[BLOCK_64X64] ||
-        mi_height > mi_size_high[BLOCK_64X64])
-      return MAX_VARTX_DEPTH;
-  }
-
-  if (is_inter) {
-    return (mi_height != mi_width) ? sf->inter_tx_size_search_init_depth_rect
-                                   : sf->inter_tx_size_search_init_depth_sqr;
-  } else {
-    return (mi_height != mi_width) ? sf->intra_tx_size_search_init_depth_rect
-                                   : sf->intra_tx_size_search_init_depth_sqr;
-  }
-}
-
-static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
-                                        MACROBLOCK *x, RD_STATS *rd_stats,
-                                        int64_t ref_best_rd, BLOCK_SIZE bs) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  int64_t rd = INT64_MAX;
-  int n;
-  int start_tx;
-  int depth;
-  int64_t best_rd = INT64_MAX;
-  const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bs];
-  TX_SIZE best_tx_size = max_rect_tx_size;
-  TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
-  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-  const int n4 = bsize_to_num_blk(bs);
-  const int tx_select = cm->tx_mode == TX_MODE_SELECT;
-
-  av1_invalid_rd_stats(rd_stats);
-
-  if (tx_select) {
-    start_tx = max_rect_tx_size;
-    depth = get_search_init_depth(mi_size_wide[bs], mi_size_high[bs],
-                                  is_inter_block(mbmi), &cpi->sf);
-  } else {
-    const TX_SIZE chosen_tx_size = tx_size_from_tx_mode(bs, cm->tx_mode);
-    start_tx = chosen_tx_size;
-    depth = MAX_TX_DEPTH;
-  }
-
-  prune_tx(cpi, bs, x, xd, EXT_TX_SET_ALL16);
-
-  for (n = start_tx; depth <= MAX_TX_DEPTH; depth++, n = sub_tx_size_map[n]) {
-#if CONFIG_DIST_8X8
-    if (x->using_dist_8x8) {
-      if (tx_size_wide[n] < 8 || tx_size_high[n] < 8) continue;
-    }
-#endif
-    RD_STATS this_rd_stats;
-    if (mbmi->ref_mv_idx > 0) x->rd_model = LOW_TXFM_RD;
-    rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, FTXS_NONE);
-    x->rd_model = FULL_TXFM_RD;
-
-    if (rd < best_rd) {
-      memcpy(best_txk_type, mbmi->txk_type,
-             sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
-      memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4);
-      best_tx_size = n;
-      best_rd = rd;
-      *rd_stats = this_rd_stats;
-    }
-    if (n == TX_4X4) break;
-  }
-
-  if (rd_stats->rate != INT_MAX) {
-    mbmi->tx_size = best_tx_size;
-    memcpy(mbmi->txk_type, best_txk_type,
-           sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
-    memcpy(x->blk_skip, best_blk_skip, sizeof(best_blk_skip[0]) * n4);
-  }
-
-  // Reset the pruning flags.
-  av1_zero(x->tx_search_prune);
-  x->tx_split_prune_flag = 0;
-}
-
-static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
-                            RD_STATS *rd_stats, BLOCK_SIZE bs,
-                            int64_t ref_best_rd) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  av1_init_rd_stats(rd_stats);
-
-  assert(bs == xd->mi[0]->sb_type);
-
-  if (xd->lossless[xd->mi[0]->segment_id]) {
-    choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
-  } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
-    choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
-  } else {
-    choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
-  }
-}
-
-// Return the rate cost for luma prediction mode info. of intra blocks.
-static int intra_mode_info_cost_y(const AV1_COMP *cpi, const MACROBLOCK *x,
-                                  const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
-                                  int mode_cost) {
-  int total_rate = mode_cost;
-  const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0;
-  const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra;
-  const int use_intrabc = mbmi->use_intrabc;
-  // Can only activate one mode.
-  assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc +
-          use_filter_intra) <= 1);
-  const int try_palette =
-      av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
-  if (try_palette && mbmi->mode == DC_PRED) {
-    const MACROBLOCKD *xd = &x->e_mbd;
-    const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
-    const int mode_ctx = av1_get_palette_mode_ctx(xd);
-    total_rate += x->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette];
-    if (use_palette) {
-      const uint8_t *const color_map = xd->plane[0].color_index_map;
-      int block_width, block_height, rows, cols;
-      av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
-                               &cols);
-      const int plt_size = mbmi->palette_mode_info.palette_size[0];
-      int palette_mode_cost =
-          x->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
-          write_uniform_cost(plt_size, color_map[0]);
-      uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-      const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
-      palette_mode_cost +=
-          av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache,
-                                   n_cache, cpi->common.seq_params.bit_depth);
-      palette_mode_cost +=
-          av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP);
-      total_rate += palette_mode_cost;
-    }
-  }
-  if (av1_filter_intra_allowed(&cpi->common, mbmi)) {
-    total_rate += x->filter_intra_cost[mbmi->sb_type][use_filter_intra];
-    if (use_filter_intra) {
-      total_rate += x->filter_intra_mode_cost[mbmi->filter_intra_mode_info
-                                                  .filter_intra_mode];
-    }
-  }
-  if (av1_is_directional_mode(mbmi->mode)) {
-    if (av1_use_angle_delta(bsize)) {
-      total_rate += x->angle_delta_cost[mbmi->mode - V_PRED]
-                                       [MAX_ANGLE_DELTA +
-                                        mbmi->angle_delta[PLANE_TYPE_Y]];
-    }
-  }
-  if (av1_allow_intrabc(&cpi->common))
-    total_rate += x->intrabc_cost[use_intrabc];
-  return total_rate;
-}
-
-// Return the rate cost for chroma prediction mode info. of intra blocks.
-static int intra_mode_info_cost_uv(const AV1_COMP *cpi, const MACROBLOCK *x,
-                                   const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
-                                   int mode_cost) {
-  int total_rate = mode_cost;
-  const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0;
-  const UV_PREDICTION_MODE mode = mbmi->uv_mode;
-  // Can only activate one mode.
-  assert(((mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1);
-
-  const int try_palette =
-      av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
-  if (try_palette && mode == UV_DC_PRED) {
-    const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
-    total_rate +=
-        x->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette];
-    if (use_palette) {
-      const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
-      const int plt_size = pmi->palette_size[1];
-      const MACROBLOCKD *xd = &x->e_mbd;
-      const uint8_t *const color_map = xd->plane[1].color_index_map;
-      int palette_mode_cost =
-          x->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
-          write_uniform_cost(plt_size, color_map[0]);
-      uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-      const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
-      palette_mode_cost += av1_palette_color_cost_uv(
-          pmi, color_cache, n_cache, cpi->common.seq_params.bit_depth);
-      palette_mode_cost +=
-          av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP);
-      total_rate += palette_mode_cost;
-    }
-  }
-  if (av1_is_directional_mode(get_uv_mode(mode))) {
-    if (av1_use_angle_delta(bsize)) {
-      total_rate +=
-          x->angle_delta_cost[mode - V_PRED][mbmi->angle_delta[PLANE_TYPE_UV] +
-                                             MAX_ANGLE_DELTA];
-    }
-  }
-  return total_rate;
-}
-
-static int conditional_skipintra(PREDICTION_MODE mode,
-                                 PREDICTION_MODE best_intra_mode) {
-  if (mode == D113_PRED && best_intra_mode != V_PRED &&
-      best_intra_mode != D135_PRED)
-    return 1;
-  if (mode == D67_PRED && best_intra_mode != V_PRED &&
-      best_intra_mode != D45_PRED)
-    return 1;
-  if (mode == D203_PRED && best_intra_mode != H_PRED &&
-      best_intra_mode != D45_PRED)
-    return 1;
-  if (mode == D157_PRED && best_intra_mode != H_PRED &&
-      best_intra_mode != D135_PRED)
-    return 1;
-  return 0;
-}
-
-// Model based RD estimation for luma intra blocks.
-static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                               BLOCK_SIZE bsize, int mode_cost, int mi_row,
-                               int mi_col) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  assert(!is_inter_block(mbmi));
-  RD_STATS this_rd_stats;
-  int row, col;
-  int64_t temp_sse, this_rd;
-  TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
-  const int stepr = tx_size_high_unit[tx_size];
-  const int stepc = tx_size_wide_unit[tx_size];
-  const int max_blocks_wide = max_block_wide(xd, bsize, 0);
-  const int max_blocks_high = max_block_high(xd, bsize, 0);
-  mbmi->tx_size = tx_size;
-  // Prediction.
-  for (row = 0; row < max_blocks_high; row += stepr) {
-    for (col = 0; col < max_blocks_wide; col += stepc) {
-      av1_predict_intra_block_facade(cm, xd, 0, col, row, tx_size);
-    }
-  }
-  // RD estimation.
-  model_rd_sb_fn[MODELRD_TYPE_INTRA](
-      cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &this_rd_stats.rate,
-      &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse, NULL, NULL, NULL);
-  if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
-    mode_cost +=
-        x->angle_delta_cost[mbmi->mode - V_PRED]
-                           [MAX_ANGLE_DELTA + mbmi->angle_delta[PLANE_TYPE_Y]];
-  }
-  if (mbmi->mode == DC_PRED &&
-      av1_filter_intra_allowed_bsize(cm, mbmi->sb_type)) {
-    if (mbmi->filter_intra_mode_info.use_filter_intra) {
-      const int mode = mbmi->filter_intra_mode_info.filter_intra_mode;
-      mode_cost += x->filter_intra_cost[mbmi->sb_type][1] +
-                   x->filter_intra_mode_cost[mode];
-    } else {
-      mode_cost += x->filter_intra_cost[mbmi->sb_type][0];
-    }
-  }
-  this_rd =
-      RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
-  return this_rd;
-}
-
-// Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
-// new_height'. Extra rows and columns are filled in by copying last valid
-// row/column.
-static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
-                                     int orig_height, int new_width,
-                                     int new_height) {
-  int j;
-  assert(new_width >= orig_width);
-  assert(new_height >= orig_height);
-  if (new_width == orig_width && new_height == orig_height) return;
-
-  for (j = orig_height - 1; j >= 0; --j) {
-    memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
-    // Copy last column to extra columns.
-    memset(color_map + j * new_width + orig_width,
-           color_map[j * new_width + orig_width - 1], new_width - orig_width);
-  }
-  // Copy last row to extra rows.
-  for (j = orig_height; j < new_height; ++j) {
-    memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
-           new_width);
-  }
-}
-
-// Bias toward using colors in the cache.
-// TODO(huisu): Try other schemes to improve compression.
-static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
-                                    int n_colors, int stride, int *centroids) {
-  if (n_cache <= 0) return;
-  for (int i = 0; i < n_colors * stride; i += stride) {
-    int min_diff = abs(centroids[i] - (int)color_cache[0]);
-    int idx = 0;
-    for (int j = 1; j < n_cache; ++j) {
-      const int this_diff = abs(centroids[i] - color_cache[j]);
-      if (this_diff < min_diff) {
-        min_diff = this_diff;
-        idx = j;
-      }
-    }
-    if (min_diff <= 1) centroids[i] = color_cache[idx];
-  }
-}
-
-// Given the base colors as specified in centroids[], calculate the RD cost
-// of palette mode.
-static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x,
-                         MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int mi_row,
-                         int mi_col, int dc_mode_cost, const int *data,
-                         int *centroids, int n, uint16_t *color_cache,
-                         int n_cache, MB_MODE_INFO *best_mbmi,
-                         uint8_t *best_palette_color_map, int64_t *best_rd,
-                         int64_t *best_model_rd, int *rate, int *rate_tokenonly,
-                         int *rate_overhead, int64_t *distortion,
-                         int *skippable, PICK_MODE_CONTEXT *ctx,
-                         uint8_t *blk_skip) {
-  optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
-  int k = av1_remove_duplicates(centroids, n);
-  if (k < PALETTE_MIN_SIZE) {
-    // Too few unique colors to create a palette. And DC_PRED will work
-    // well for that case anyway. So skip.
-    return;
-  }
-  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  if (cpi->common.seq_params.use_highbitdepth)
-    for (int i = 0; i < k; ++i)
-      pmi->palette_colors[i] = clip_pixel_highbd(
-          (int)centroids[i], cpi->common.seq_params.bit_depth);
-  else
-    for (int i = 0; i < k; ++i)
-      pmi->palette_colors[i] = clip_pixel(centroids[i]);
-  pmi->palette_size[0] = k;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  uint8_t *const color_map = xd->plane[0].color_index_map;
-  int block_width, block_height, rows, cols;
-  av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
-                           &cols);
-  av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
-  extend_palette_color_map(color_map, cols, rows, block_width, block_height);
-  const int palette_mode_cost =
-      intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost);
-  int64_t this_model_rd =
-      intra_model_yrd(cpi, x, bsize, palette_mode_cost, mi_row, mi_col);
-  if (*best_model_rd != INT64_MAX &&
-      this_model_rd > *best_model_rd + (*best_model_rd >> 1))
-    return;
-  if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
-  RD_STATS tokenonly_rd_stats;
-  super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
-  if (tokenonly_rd_stats.rate == INT_MAX) return;
-  int this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
-  int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
-    tokenonly_rd_stats.rate -=
-        tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
-  }
-  if (this_rd < *best_rd) {
-    *best_rd = this_rd;
-    memcpy(best_palette_color_map, color_map,
-           block_width * block_height * sizeof(color_map[0]));
-    *best_mbmi = *mbmi;
-    memcpy(blk_skip, x->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-    *rate_overhead = this_rate - tokenonly_rd_stats.rate;
-    if (rate) *rate = this_rate;
-    if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
-    if (distortion) *distortion = tokenonly_rd_stats.dist;
-    if (skippable) *skippable = tokenonly_rd_stats.skip;
-  }
-}
-
-static int rd_pick_palette_intra_sby(
-    const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
-    int mi_col, int dc_mode_cost, MB_MODE_INFO *best_mbmi,
-    uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd,
-    int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
-    PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip) {
-  int rate_overhead = 0;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  assert(!is_inter_block(mbmi));
-  assert(av1_allow_palette(cpi->common.allow_screen_content_tools, bsize));
-  const SequenceHeader *const seq_params = &cpi->common.seq_params;
-  int colors, n;
-  const int src_stride = x->plane[0].src.stride;
-  const uint8_t *const src = x->plane[0].src.buf;
-  uint8_t *const color_map = xd->plane[0].color_index_map;
-  int block_width, block_height, rows, cols;
-  av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
-                           &cols);
-
-  int count_buf[1 << 12];  // Maximum (1 << 12) color levels.
-  if (seq_params->use_highbitdepth)
-    colors = av1_count_colors_highbd(src, src_stride, rows, cols,
-                                     seq_params->bit_depth, count_buf);
-  else
-    colors = av1_count_colors(src, src_stride, rows, cols, count_buf);
-  mbmi->filter_intra_mode_info.use_filter_intra = 0;
-
-  if (colors > 1 && colors <= 64) {
-    int r, c, i;
-    const int max_itr = 50;
-    int *const data = x->palette_buffer->kmeans_data_buf;
-    int centroids[PALETTE_MAX_SIZE];
-    int lb, ub, val;
-    uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
-    if (seq_params->use_highbitdepth)
-      lb = ub = src16[0];
-    else
-      lb = ub = src[0];
-
-    if (seq_params->use_highbitdepth) {
-      for (r = 0; r < rows; ++r) {
-        for (c = 0; c < cols; ++c) {
-          val = src16[r * src_stride + c];
-          data[r * cols + c] = val;
-          if (val < lb)
-            lb = val;
-          else if (val > ub)
-            ub = val;
-        }
-      }
-    } else {
-      for (r = 0; r < rows; ++r) {
-        for (c = 0; c < cols; ++c) {
-          val = src[r * src_stride + c];
-          data[r * cols + c] = val;
-          if (val < lb)
-            lb = val;
-          else if (val > ub)
-            ub = val;
-        }
-      }
-    }
-
-    mbmi->mode = DC_PRED;
-    mbmi->filter_intra_mode_info.use_filter_intra = 0;
-
-    uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-    const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
-
-    // Find the dominant colors, stored in top_colors[].
-    int top_colors[PALETTE_MAX_SIZE] = { 0 };
-    for (i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) {
-      int max_count = 0;
-      for (int j = 0; j < (1 << seq_params->bit_depth); ++j) {
-        if (count_buf[j] > max_count) {
-          max_count = count_buf[j];
-          top_colors[i] = j;
-        }
-      }
-      assert(max_count > 0);
-      count_buf[top_colors[i]] = 0;
-    }
-
-    // Try the dominant colors directly.
-    // TODO(huisu@google.com): Try to avoid duplicate computation in cases
-    // where the dominant colors and the k-means results are similar.
-    for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
-      for (i = 0; i < n; ++i) centroids[i] = top_colors[i];
-      palette_rd_y(cpi, x, mbmi, bsize, mi_row, mi_col, dc_mode_cost, data,
-                   centroids, n, color_cache, n_cache, best_mbmi,
-                   best_palette_color_map, best_rd, best_model_rd, rate,
-                   rate_tokenonly, &rate_overhead, distortion, skippable, ctx,
-                   best_blk_skip);
-    }
-
-    // K-means clustering.
-    for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
-      if (colors == PALETTE_MIN_SIZE) {
-        // Special case: These colors automatically become the centroids.
-        assert(colors == n);
-        assert(colors == 2);
-        centroids[0] = lb;
-        centroids[1] = ub;
-      } else {
-        for (i = 0; i < n; ++i) {
-          centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
-        }
-        av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
-      }
-      palette_rd_y(cpi, x, mbmi, bsize, mi_row, mi_col, dc_mode_cost, data,
-                   centroids, n, color_cache, n_cache, best_mbmi,
-                   best_palette_color_map, best_rd, best_model_rd, rate,
-                   rate_tokenonly, &rate_overhead, distortion, skippable, ctx,
-                   best_blk_skip);
-    }
-  }
-
-  if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
-    memcpy(color_map, best_palette_color_map,
-           block_width * block_height * sizeof(best_palette_color_map[0]));
-  }
-  *mbmi = *best_mbmi;
-  return rate_overhead;
-}
-
-// Return 1 if an filter intra mode is selected; return 0 otherwise.
-static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    int mi_row, int mi_col, int *rate,
-                                    int *rate_tokenonly, int64_t *distortion,
-                                    int *skippable, BLOCK_SIZE bsize,
-                                    int mode_cost, int64_t *best_rd,
-                                    int64_t *best_model_rd,
-                                    PICK_MODE_CONTEXT *ctx) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  int filter_intra_selected_flag = 0;
-  FILTER_INTRA_MODE mode;
-  TX_SIZE best_tx_size = TX_8X8;
-  FILTER_INTRA_MODE_INFO filter_intra_mode_info;
-  TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
-  (void)ctx;
-  av1_zero(filter_intra_mode_info);
-  mbmi->filter_intra_mode_info.use_filter_intra = 1;
-  mbmi->mode = DC_PRED;
-  mbmi->palette_mode_info.palette_size[0] = 0;
-
-  for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
-    int64_t this_rd, this_model_rd;
-    RD_STATS tokenonly_rd_stats;
-    mbmi->filter_intra_mode_info.filter_intra_mode = mode;
-    this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost, mi_row, mi_col);
-    if (*best_model_rd != INT64_MAX &&
-        this_model_rd > *best_model_rd + (*best_model_rd >> 1))
-      continue;
-    if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
-    super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
-    if (tokenonly_rd_stats.rate == INT_MAX) continue;
-    const int this_rate =
-        tokenonly_rd_stats.rate +
-        intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
-    this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-
-    if (this_rd < *best_rd) {
-      *best_rd = this_rd;
-      best_tx_size = mbmi->tx_size;
-      filter_intra_mode_info = mbmi->filter_intra_mode_info;
-      memcpy(best_txk_type, mbmi->txk_type,
-             sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
-      memcpy(ctx->blk_skip, x->blk_skip,
-             sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-      *rate = this_rate;
-      *rate_tokenonly = tokenonly_rd_stats.rate;
-      *distortion = tokenonly_rd_stats.dist;
-      *skippable = tokenonly_rd_stats.skip;
-      filter_intra_selected_flag = 1;
-    }
-  }
-
-  if (filter_intra_selected_flag) {
-    mbmi->mode = DC_PRED;
-    mbmi->tx_size = best_tx_size;
-    mbmi->filter_intra_mode_info = filter_intra_mode_info;
-    memcpy(mbmi->txk_type, best_txk_type,
-           sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN);
-    return 1;
-  } else {
-    return 0;
-  }
-}
-
-// Run RD calculation with given luma intra prediction angle., and return
-// the RD cost. Update the best mode info. if the RD cost is the best so far.
-static int64_t calc_rd_given_intra_angle(
-    const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
-    int mi_col, int mode_cost, int64_t best_rd_in, int8_t angle_delta,
-    int max_angle_delta, int *rate, RD_STATS *rd_stats, int *best_angle_delta,
-    TX_SIZE *best_tx_size, int64_t *best_rd, int64_t *best_model_rd,
-    TX_TYPE *best_txk_type, uint8_t *best_blk_skip) {
-  RD_STATS tokenonly_rd_stats;
-  int64_t this_rd, this_model_rd;
-  MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
-  const int n4 = bsize_to_num_blk(bsize);
-  assert(!is_inter_block(mbmi));
-  mbmi->angle_delta[PLANE_TYPE_Y] = angle_delta;
-  this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost, mi_row, mi_col);
-  if (*best_model_rd != INT64_MAX &&
-      this_model_rd > *best_model_rd + (*best_model_rd >> 1))
-    return INT64_MAX;
-  if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
-  super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
-  if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
-
-  int this_rate =
-      mode_cost + tokenonly_rd_stats.rate +
-      x->angle_delta_cost[mbmi->mode - V_PRED][max_angle_delta + angle_delta];
-  this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-
-  if (this_rd < *best_rd) {
-    memcpy(best_txk_type, mbmi->txk_type,
-           sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
-    memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4);
-    *best_rd = this_rd;
-    *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_Y];
-    *best_tx_size = mbmi->tx_size;
-    *rate = this_rate;
-    rd_stats->rate = tokenonly_rd_stats.rate;
-    rd_stats->dist = tokenonly_rd_stats.dist;
-    rd_stats->skip = tokenonly_rd_stats.skip;
-  }
-  return this_rd;
-}
-
-// With given luma directional intra prediction mode, pick the best angle delta
-// Return the RD cost corresponding to the best angle delta.
-static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                       int mi_row, int mi_col, int *rate,
-                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                                       int mode_cost, int64_t best_rd,
-                                       int64_t *best_model_rd) {
-  MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
-  assert(!is_inter_block(mbmi));
-
-  int best_angle_delta = 0;
-  int64_t rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
-  TX_SIZE best_tx_size = mbmi->tx_size;
-  TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
-  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-
-  for (int i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
-
-  int first_try = 1;
-  for (int angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
-    for (int i = 0; i < 2; ++i) {
-      const int64_t best_rd_in =
-          (best_rd == INT64_MAX) ? INT64_MAX
-                                 : (best_rd + (best_rd >> (first_try ? 3 : 5)));
-      const int64_t this_rd = calc_rd_given_intra_angle(
-          cpi, x, bsize, mi_row, mi_col, mode_cost, best_rd_in,
-          (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate, rd_stats,
-          &best_angle_delta, &best_tx_size, &best_rd, best_model_rd,
-          best_txk_type, best_blk_skip);
-      rd_cost[2 * angle_delta + i] = this_rd;
-      if (first_try && this_rd == INT64_MAX) return best_rd;
-      first_try = 0;
-      if (angle_delta == 0) {
-        rd_cost[1] = this_rd;
-        break;
-      }
-    }
-  }
-
-  assert(best_rd != INT64_MAX);
-  for (int angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
-    for (int i = 0; i < 2; ++i) {
-      int skip_search = 0;
-      const int64_t rd_thresh = best_rd + (best_rd >> 5);
-      if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
-          rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
-        skip_search = 1;
-      if (!skip_search) {
-        calc_rd_given_intra_angle(cpi, x, bsize, mi_row, mi_col, mode_cost,
-                                  best_rd, (1 - 2 * i) * angle_delta,
-                                  MAX_ANGLE_DELTA, rate, rd_stats,
-                                  &best_angle_delta, &best_tx_size, &best_rd,
-                                  best_model_rd, best_txk_type, best_blk_skip);
-      }
-    }
-  }
-
-  if (rd_stats->rate != INT_MAX) {
-    mbmi->tx_size = best_tx_size;
-    mbmi->angle_delta[PLANE_TYPE_Y] = best_angle_delta;
-    memcpy(mbmi->txk_type, best_txk_type,
-           sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
-    memcpy(x->blk_skip, best_blk_skip,
-           sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
-  }
-  return best_rd;
-}
-
-// Indices are sign, integer, and fractional part of the gradient value
-static const uint8_t gradient_to_angle_bin[2][7][16] = {
-  {
-      { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
-      { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-      { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-      { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-      { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
-      { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
-  },
-  {
-      { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
-      { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
-      { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
-      { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
-      { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
-      { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
-      { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
-  },
-};
-
-/* clang-format off */
-static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
-  0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
-  0,
-};
-/* clang-format on */
-
-static void angle_estimation(const uint8_t *src, int src_stride, int rows,
-                             int cols, BLOCK_SIZE bsize,
-                             uint8_t *directional_mode_skip_mask) {
-  memset(directional_mode_skip_mask, 0,
-         INTRA_MODES * sizeof(*directional_mode_skip_mask));
-  // Check if angle_delta is used
-  if (!av1_use_angle_delta(bsize)) return;
-  uint64_t hist[DIRECTIONAL_MODES];
-  memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
-  src += src_stride;
-  int r, c, dx, dy;
-  for (r = 1; r < rows; ++r) {
-    for (c = 1; c < cols; ++c) {
-      dx = src[c] - src[c - 1];
-      dy = src[c] - src[c - src_stride];
-      int index;
-      const int temp = dx * dx + dy * dy;
-      if (dy == 0) {
-        index = 2;
-      } else {
-        const int sn = (dx > 0) ^ (dy > 0);
-        dx = abs(dx);
-        dy = abs(dy);
-        const int remd = (dx % dy) * 16 / dy;
-        const int quot = dx / dy;
-        index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
-      }
-      hist[index] += temp;
-    }
-    src += src_stride;
-  }
-
-  int i;
-  uint64_t hist_sum = 0;
-  for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
-  for (i = 0; i < INTRA_MODES; ++i) {
-    if (av1_is_directional_mode(i)) {
-      const uint8_t angle_bin = mode_to_angle_bin[i];
-      uint64_t score = 2 * hist[angle_bin];
-      int weight = 2;
-      if (angle_bin > 0) {
-        score += hist[angle_bin - 1];
-        ++weight;
-      }
-      if (angle_bin < DIRECTIONAL_MODES - 1) {
-        score += hist[angle_bin + 1];
-        ++weight;
-      }
-      if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
-        directional_mode_skip_mask[i] = 1;
-    }
-  }
-}
-
-static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
-                                    int rows, int cols, BLOCK_SIZE bsize,
-                                    uint8_t *directional_mode_skip_mask) {
-  memset(directional_mode_skip_mask, 0,
-         INTRA_MODES * sizeof(*directional_mode_skip_mask));
-  // Check if angle_delta is used
-  if (!av1_use_angle_delta(bsize)) return;
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint64_t hist[DIRECTIONAL_MODES];
-  memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
-  src += src_stride;
-  int r, c, dx, dy;
-  for (r = 1; r < rows; ++r) {
-    for (c = 1; c < cols; ++c) {
-      dx = src[c] - src[c - 1];
-      dy = src[c] - src[c - src_stride];
-      int index;
-      const int temp = dx * dx + dy * dy;
-      if (dy == 0) {
-        index = 2;
-      } else {
-        const int sn = (dx > 0) ^ (dy > 0);
-        dx = abs(dx);
-        dy = abs(dy);
-        const int remd = (dx % dy) * 16 / dy;
-        const int quot = dx / dy;
-        index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
-      }
-      hist[index] += temp;
-    }
-    src += src_stride;
-  }
-
-  int i;
-  uint64_t hist_sum = 0;
-  for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
-  for (i = 0; i < INTRA_MODES; ++i) {
-    if (av1_is_directional_mode(i)) {
-      const uint8_t angle_bin = mode_to_angle_bin[i];
-      uint64_t score = 2 * hist[angle_bin];
-      int weight = 2;
-      if (angle_bin > 0) {
-        score += hist[angle_bin - 1];
-        ++weight;
-      }
-      if (angle_bin < DIRECTIONAL_MODES - 1) {
-        score += hist[angle_bin + 1];
-        ++weight;
-      }
-      if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
-        directional_mode_skip_mask[i] = 1;
-    }
-  }
-}
-
-// Given selected prediction mode, search for the best tx type and size.
-static void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
-                            BLOCK_SIZE bsize, const int *bmode_costs,
-                            int64_t *best_rd, int *rate, int *rate_tokenonly,
-                            int64_t *distortion, int *skippable,
-                            MB_MODE_INFO *best_mbmi, PICK_MODE_CONTEXT *ctx) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  RD_STATS rd_stats;
-  super_block_yrd(cpi, x, &rd_stats, bsize, *best_rd);
-  if (rd_stats.rate == INT_MAX) return;
-  int this_rate_tokenonly = rd_stats.rate;
-  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
-    // super_block_yrd above includes the cost of the tx_size in the
-    // tokenonly rate, but for intra blocks, tx_size is always coded
-    // (prediction granularity), so we account for it in the full rate,
-    // not the tokenonly rate.
-    this_rate_tokenonly -= tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
-  }
-  const int this_rate =
-      rd_stats.rate +
-      intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
-  const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist);
-  if (this_rd < *best_rd) {
-    *best_mbmi = *mbmi;
-    *best_rd = this_rd;
-    *rate = this_rate;
-    *rate_tokenonly = this_rate_tokenonly;
-    *distortion = rd_stats.dist;
-    *skippable = rd_stats.skip;
-    memcpy(ctx->blk_skip, x->blk_skip,
-           sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-  }
-}
-
-// This function is used only for intra_only frames
-static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                      int mi_row, int mi_col, int *rate,
-                                      int *rate_tokenonly, int64_t *distortion,
-                                      int *skippable, BLOCK_SIZE bsize,
-                                      int64_t best_rd, PICK_MODE_CONTEXT *ctx) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  assert(!is_inter_block(mbmi));
-  int64_t best_model_rd = INT64_MAX;
-  const int rows = block_size_high[bsize];
-  const int cols = block_size_wide[bsize];
-  int is_directional_mode;
-  uint8_t directional_mode_skip_mask[INTRA_MODES];
-  const int src_stride = x->plane[0].src.stride;
-  const uint8_t *src = x->plane[0].src.buf;
-  int beat_best_rd = 0;
-  const int *bmode_costs;
-  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const int try_palette =
-      av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
-  uint8_t *best_palette_color_map =
-      try_palette ? x->palette_buffer->best_palette_color_map : NULL;
-  const MB_MODE_INFO *above_mi = xd->above_mbmi;
-  const MB_MODE_INFO *left_mi = xd->left_mbmi;
-  const PREDICTION_MODE A = av1_above_block_mode(above_mi);
-  const PREDICTION_MODE L = av1_left_block_mode(left_mi);
-  const int above_ctx = intra_mode_context[A];
-  const int left_ctx = intra_mode_context[L];
-  bmode_costs = x->y_mode_costs[above_ctx][left_ctx];
-
-  mbmi->angle_delta[PLANE_TYPE_Y] = 0;
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-    highbd_angle_estimation(src, src_stride, rows, cols, bsize,
-                            directional_mode_skip_mask);
-  else
-    angle_estimation(src, src_stride, rows, cols, bsize,
-                     directional_mode_skip_mask);
-  mbmi->filter_intra_mode_info.use_filter_intra = 0;
-  pmi->palette_size[0] = 0;
-
-  if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
-    x->use_default_intra_tx_type = 1;
-  else
-    x->use_default_intra_tx_type = 0;
-
-  MB_MODE_INFO best_mbmi = *mbmi;
-  /* Y Search for intra prediction mode */
-  for (int mode_idx = INTRA_MODE_START; mode_idx < INTRA_MODE_END; ++mode_idx) {
-    RD_STATS this_rd_stats;
-    int this_rate, this_rate_tokenonly, s;
-    int64_t this_distortion, this_rd, this_model_rd;
-    mbmi->mode = intra_rd_search_mode_order[mode_idx];
-    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
-    this_model_rd =
-        intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode], mi_row, mi_col);
-    if (best_model_rd != INT64_MAX &&
-        this_model_rd > best_model_rd + (best_model_rd >> 1))
-      continue;
-    if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
-    is_directional_mode = av1_is_directional_mode(mbmi->mode);
-    if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
-    if (is_directional_mode && av1_use_angle_delta(bsize)) {
-      this_rd_stats.rate = INT_MAX;
-      rd_pick_intra_angle_sby(cpi, x, mi_row, mi_col, &this_rate,
-                              &this_rd_stats, bsize, bmode_costs[mbmi->mode],
-                              best_rd, &best_model_rd);
-    } else {
-      super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
-    }
-    this_rate_tokenonly = this_rd_stats.rate;
-    this_distortion = this_rd_stats.dist;
-    s = this_rd_stats.skip;
-
-    if (this_rate_tokenonly == INT_MAX) continue;
-
-    if (!xd->lossless[mbmi->segment_id] &&
-        block_signals_txsize(mbmi->sb_type)) {
-      // super_block_yrd above includes the cost of the tx_size in the
-      // tokenonly rate, but for intra blocks, tx_size is always coded
-      // (prediction granularity), so we account for it in the full rate,
-      // not the tokenonly rate.
-      this_rate_tokenonly -=
-          tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
-    }
-    this_rate =
-        this_rd_stats.rate +
-        intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
-    this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
-    if (this_rd < best_rd) {
-      best_mbmi = *mbmi;
-      best_rd = this_rd;
-      beat_best_rd = 1;
-      *rate = this_rate;
-      *rate_tokenonly = this_rate_tokenonly;
-      *distortion = this_distortion;
-      *skippable = s;
-      memcpy(ctx->blk_skip, x->blk_skip,
-             sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-    }
-  }
-
-  if (try_palette) {
-    rd_pick_palette_intra_sby(
-        cpi, x, bsize, mi_row, mi_col, bmode_costs[DC_PRED], &best_mbmi,
-        best_palette_color_map, &best_rd, &best_model_rd, rate, rate_tokenonly,
-        distortion, skippable, ctx, ctx->blk_skip);
-  }
-
-  if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) {
-    if (rd_pick_filter_intra_sby(
-            cpi, x, mi_row, mi_col, rate, rate_tokenonly, distortion, skippable,
-            bsize, bmode_costs[DC_PRED], &best_rd, &best_model_rd, ctx)) {
-      best_mbmi = *mbmi;
-    }
-  }
-
-  // If previous searches use only the default tx type, do an extra search for
-  // the best tx type.
-  if (x->use_default_intra_tx_type) {
-    *mbmi = best_mbmi;
-    x->use_default_intra_tx_type = 0;
-    intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, rate_tokenonly,
-                    distortion, skippable, &best_mbmi, ctx);
-  }
-
-  *mbmi = best_mbmi;
-  return best_rd;
-}
-
-// Return value 0: early termination triggered, no valid rd cost available;
-//              1: rd cost values are valid.
-static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
-                            RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                            int64_t ref_best_rd) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_U];
-  const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
-  int plane;
-  int is_cost_valid = 1;
-  av1_init_rd_stats(rd_stats);
-
-  if (ref_best_rd < 0) is_cost_valid = 0;
-
-  if (x->skip_chroma_rd) return is_cost_valid;
-
-  bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
-
-  if (is_inter_block(mbmi) && is_cost_valid) {
-    for (plane = 1; plane < MAX_MB_PLANE; ++plane)
-      av1_subtract_plane(x, bsize, plane);
-  }
-
-  if (is_cost_valid) {
-    for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
-      RD_STATS pn_rd_stats;
-      txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
-                       uv_tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE);
-      if (pn_rd_stats.rate == INT_MAX) {
-        is_cost_valid = 0;
-        break;
-      }
-      av1_merge_rd_stats(rd_stats, &pn_rd_stats);
-      if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd &&
-          RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) {
-        is_cost_valid = 0;
-        break;
-      }
-    }
-  }
-
-  if (!is_cost_valid) {
-    // reset cost value
-    av1_invalid_rd_stats(rd_stats);
-  }
-
-  return is_cost_valid;
-}
-
-static void tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
-                          int blk_row, int blk_col, int plane, int block,
-                          int plane_bsize, TXB_CTX *txb_ctx, RD_STATS *rd_stats,
-                          FAST_TX_SEARCH_MODE ftxs_mode, int64_t ref_rdcost,
-                          TXB_RD_INFO *rd_info_array) {
-  const struct macroblock_plane *const p = &x->plane[plane];
-  const uint16_t cur_joint_ctx =
-      (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx;
-  const int txk_type_idx =
-      av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
-  // Look up RD and terminate early in case when we've already processed exactly
-  // the same residual with exactly the same entropy context.
-  if (rd_info_array != NULL && rd_info_array->valid &&
-      rd_info_array->entropy_context == cur_joint_ctx) {
-    if (plane == 0)
-      x->e_mbd.mi[0]->txk_type[txk_type_idx] = rd_info_array->tx_type;
-    const TX_TYPE ref_tx_type =
-        av1_get_tx_type(get_plane_type(plane), &x->e_mbd, blk_row, blk_col,
-                        tx_size, cpi->common.reduced_tx_set_used);
-    if (ref_tx_type == rd_info_array->tx_type) {
-      rd_stats->rate += rd_info_array->rate;
-      rd_stats->dist += rd_info_array->dist;
-      rd_stats->sse += rd_info_array->sse;
-      rd_stats->skip &= rd_info_array->eob == 0;
-      p->eobs[block] = rd_info_array->eob;
-      p->txb_entropy_ctx[block] = rd_info_array->txb_entropy_ctx;
-      return;
-    }
-  }
-
-  RD_STATS this_rd_stats;
-  search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                  txb_ctx, ftxs_mode, 0, ref_rdcost, &this_rd_stats);
-
-  av1_merge_rd_stats(rd_stats, &this_rd_stats);
-
-  // Save RD results for possible reuse in future.
-  if (rd_info_array != NULL) {
-    rd_info_array->valid = 1;
-    rd_info_array->entropy_context = cur_joint_ctx;
-    rd_info_array->rate = this_rd_stats.rate;
-    rd_info_array->dist = this_rd_stats.dist;
-    rd_info_array->sse = this_rd_stats.sse;
-    rd_info_array->eob = p->eobs[block];
-    rd_info_array->txb_entropy_ctx = p->txb_entropy_ctx[block];
-    if (plane == 0) {
-      rd_info_array->tx_type = x->e_mbd.mi[0]->txk_type[txk_type_idx];
-    }
-  }
-}
-
-static void get_mean_and_dev(const int16_t *data, int stride, int bw, int bh,
-                             float *mean, float *dev) {
-  int x_sum = 0;
-  uint64_t x2_sum = 0;
-  for (int i = 0; i < bh; ++i) {
-    for (int j = 0; j < bw; ++j) {
-      const int val = data[j];
-      x_sum += val;
-      x2_sum += val * val;
-    }
-    data += stride;
-  }
-
-  const int num = bw * bh;
-  const float e_x = (float)x_sum / num;
-  const float e_x2 = (float)((double)x2_sum / num);
-  const float diff = e_x2 - e_x * e_x;
-  *dev = (diff > 0) ? sqrtf(diff) : 0;
-  *mean = e_x;
-}
-
-static void get_mean_and_dev_float(const float *data, int stride, int bw,
-                                   int bh, float *mean, float *dev) {
-  float x_sum = 0;
-  float x2_sum = 0;
-  for (int i = 0; i < bh; ++i) {
-    for (int j = 0; j < bw; ++j) {
-      const float val = data[j];
-      x_sum += val;
-      x2_sum += val * val;
-    }
-    data += stride;
-  }
-
-  const int num = bw * bh;
-  const float e_x = x_sum / num;
-  const float e_x2 = x2_sum / num;
-  const float diff = e_x2 - e_x * e_x;
-  *dev = (diff > 0) ? sqrtf(diff) : 0;
-  *mean = e_x;
-}
-
-// Feature used by the model to predict tx split: the mean and standard
-// deviation values of the block and sub-blocks.
-static void get_mean_dev_features(const int16_t *data, int stride, int bw,
-                                  int bh, int levels, float *feature) {
-  int feature_idx = 0;
-  int width = bw;
-  int height = bh;
-  const int16_t *const data_ptr = &data[0];
-  for (int lv = 0; lv < levels; ++lv) {
-    if (width < 2 || height < 2) break;
-    float mean_buf[16];
-    float dev_buf[16];
-    int blk_idx = 0;
-    for (int row = 0; row < bh; row += height) {
-      for (int col = 0; col < bw; col += width) {
-        float mean, dev;
-        get_mean_and_dev(data_ptr + row * stride + col, stride, width, height,
-                         &mean, &dev);
-        feature[feature_idx++] = mean;
-        feature[feature_idx++] = dev;
-        mean_buf[blk_idx] = mean;
-        dev_buf[blk_idx++] = dev;
-      }
-    }
-    if (blk_idx > 1) {
-      float mean, dev;
-      // Deviation of means.
-      get_mean_and_dev_float(mean_buf, 1, 1, blk_idx, &mean, &dev);
-      feature[feature_idx++] = dev;
-      // Mean of deviations.
-      get_mean_and_dev_float(dev_buf, 1, 1, blk_idx, &mean, &dev);
-      feature[feature_idx++] = mean;
-    }
-    // Reduce the block size when proceeding to the next level.
-    if (height == width) {
-      height = height >> 1;
-      width = width >> 1;
-    } else if (height > width) {
-      height = height >> 1;
-    } else {
-      width = width >> 1;
-    }
-  }
-}
-
-static int ml_predict_tx_split(MACROBLOCK *x, BLOCK_SIZE bsize, int blk_row,
-                               int blk_col, TX_SIZE tx_size) {
-  const NN_CONFIG *nn_config = av1_tx_split_nnconfig_map[tx_size];
-  if (!nn_config) return -1;
-
-  const int diff_stride = block_size_wide[bsize];
-  const int16_t *diff =
-      x->plane[0].src_diff + 4 * blk_row * diff_stride + 4 * blk_col;
-  const int bw = tx_size_wide[tx_size];
-  const int bh = tx_size_high[tx_size];
-  aom_clear_system_state();
-
-  float features[64] = { 0.0f };
-  get_mean_dev_features(diff, diff_stride, bw, bh, 2, features);
-
-  float score = 0.0f;
-  av1_nn_predict(features, nn_config, &score);
-  if (score > 8.0f) return 100;
-  if (score < -8.0f) return 0;
-  score = 1.0f / (1.0f + (float)exp(-score));
-  return (int)(score * 100);
-}
-
-typedef struct {
-  int64_t rd;
-  int txb_entropy_ctx;
-  TX_TYPE tx_type;
-} TxCandidateInfo;
-
-static void try_tx_block_no_split(
-    const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
-    TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize,
-    const ENTROPY_CONTEXT *ta, const ENTROPY_CONTEXT *tl,
-    int txfm_partition_ctx, RD_STATS *rd_stats, int64_t ref_best_rd,
-    FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node,
-    TxCandidateInfo *no_split) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  struct macroblock_plane *const p = &x->plane[0];
-  const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-
-  no_split->rd = INT64_MAX;
-  no_split->txb_entropy_ctx = 0;
-  no_split->tx_type = TX_TYPES;
-
-  const ENTROPY_CONTEXT *const pta = ta + blk_col;
-  const ENTROPY_CONTEXT *const ptl = tl + blk_row;
-
-  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-  TXB_CTX txb_ctx;
-  get_txb_ctx(plane_bsize, tx_size, 0, pta, ptl, &txb_ctx);
-  const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_Y]
-                                .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
-
-  rd_stats->ref_rdcost = ref_best_rd;
-  rd_stats->zero_rate = zero_blk_rate;
-  const int index = av1_get_txb_size_index(plane_bsize, blk_row, blk_col);
-  mbmi->inter_tx_size[index] = tx_size;
-  tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, 0, block, plane_bsize,
-                &txb_ctx, rd_stats, ftxs_mode, ref_best_rd,
-                rd_info_node != NULL ? rd_info_node->rd_info_array : NULL);
-  assert(rd_stats->rate < INT_MAX);
-
-  if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
-           RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
-       rd_stats->skip == 1) &&
-      !xd->lossless[mbmi->segment_id]) {
-#if CONFIG_RD_DEBUG
-    av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
-                              zero_blk_rate - rd_stats->rate);
-#endif  // CONFIG_RD_DEBUG
-    rd_stats->rate = zero_blk_rate;
-    rd_stats->dist = rd_stats->sse;
-    rd_stats->skip = 1;
-    set_blk_skip(x, 0, blk_row * bw + blk_col, 1);
-    p->eobs[block] = 0;
-    update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
-                     DCT_DCT);
-  } else {
-    set_blk_skip(x, 0, blk_row * bw + blk_col, 0);
-    rd_stats->skip = 0;
-  }
-
-  if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
-    rd_stats->rate += x->txfm_partition_cost[txfm_partition_ctx][0];
-
-  no_split->rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-  no_split->txb_entropy_ctx = p->txb_entropy_ctx[block];
-  const int txk_type_idx =
-      av1_get_txk_type_index(plane_bsize, blk_row, blk_col);
-  no_split->tx_type = mbmi->txk_type[txk_type_idx];
-}
-
-static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
-                            int blk_col, int block, TX_SIZE tx_size, int depth,
-                            BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
-                            ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above,
-                            TXFM_CONTEXT *tx_left, RD_STATS *rd_stats,
-                            int64_t ref_best_rd, int *is_cost_valid,
-                            FAST_TX_SEARCH_MODE ftxs_mode,
-                            TXB_RD_INFO_NODE *rd_info_node);
-
-static void try_tx_block_split(
-    const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
-    TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
-    ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
-    int txfm_partition_ctx, int64_t no_split_rd, int64_t ref_best_rd,
-    FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node,
-    RD_STATS *split_rd_stats, int64_t *split_rd) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
-  const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0);
-  const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-  const int bsw = tx_size_wide_unit[sub_txs];
-  const int bsh = tx_size_high_unit[sub_txs];
-  const int sub_step = bsw * bsh;
-  RD_STATS this_rd_stats;
-  int this_cost_valid = 1;
-  int64_t tmp_rd = 0;
-
-  split_rd_stats->rate = x->txfm_partition_cost[txfm_partition_ctx][1];
-
-  assert(tx_size < TX_SIZES_ALL);
-
-  int blk_idx = 0;
-  for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) {
-    for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw, ++blk_idx) {
-      const int offsetr = blk_row + r;
-      const int offsetc = blk_col + c;
-      if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-      assert(blk_idx < 4);
-      select_tx_block(
-          cpi, x, offsetr, offsetc, block, sub_txs, depth + 1, plane_bsize, ta,
-          tl, tx_above, tx_left, &this_rd_stats, ref_best_rd - tmp_rd,
-          &this_cost_valid, ftxs_mode,
-          (rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL);
-
-      if (!this_cost_valid) goto LOOP_EXIT;
-
-      av1_merge_rd_stats(split_rd_stats, &this_rd_stats);
-
-      tmp_rd = RDCOST(x->rdmult, split_rd_stats->rate, split_rd_stats->dist);
-
-      if (no_split_rd < tmp_rd) {
-        this_cost_valid = 0;
-        goto LOOP_EXIT;
-      }
-      block += sub_step;
-    }
-  }
-
-LOOP_EXIT : {}
-
-  if (this_cost_valid) *split_rd = tmp_rd;
-}
-
-// Search for the best tx partition/type for a given luma block.
-static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
-                            int blk_col, int block, TX_SIZE tx_size, int depth,
-                            BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
-                            ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above,
-                            TXFM_CONTEXT *tx_left, RD_STATS *rd_stats,
-                            int64_t ref_best_rd, int *is_cost_valid,
-                            FAST_TX_SEARCH_MODE ftxs_mode,
-                            TXB_RD_INFO_NODE *rd_info_node) {
-  assert(tx_size < TX_SIZES_ALL);
-  av1_init_rd_stats(rd_stats);
-  if (ref_best_rd < 0) {
-    *is_cost_valid = 0;
-    return;
-  }
-
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
-  const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0);
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
-  const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
-                                         mbmi->sb_type, tx_size);
-  struct macroblock_plane *const p = &x->plane[0];
-
-  const int try_no_split = 1;
-  int try_split = tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH;
-#if CONFIG_DIST_8X8
-  if (x->using_dist_8x8)
-    try_split &= tx_size_wide[tx_size] >= 16 && tx_size_high[tx_size] >= 16;
-#endif
-  TxCandidateInfo no_split = { INT64_MAX, 0, TX_TYPES };
-
-  // TX no split
-  if (try_no_split) {
-    try_tx_block_no_split(cpi, x, blk_row, blk_col, block, tx_size, depth,
-                          plane_bsize, ta, tl, ctx, rd_stats, ref_best_rd,
-                          ftxs_mode, rd_info_node, &no_split);
-
-    if (cpi->sf.adaptive_txb_search_level &&
-        (no_split.rd -
-         (no_split.rd >> (1 + cpi->sf.adaptive_txb_search_level))) >
-            ref_best_rd) {
-      *is_cost_valid = 0;
-      return;
-    }
-
-    if (cpi->sf.txb_split_cap) {
-      if (p->eobs[block] == 0) try_split = 0;
-    }
-  }
-
-  if (x->e_mbd.bd == 8 && !x->cb_partition_scan && try_split) {
-    const int threshold = cpi->sf.tx_type_search.ml_tx_split_thresh;
-    if (threshold >= 0) {
-      const int split_score =
-          ml_predict_tx_split(x, plane_bsize, blk_row, blk_col, tx_size);
-      if (split_score >= 0 && split_score < threshold) try_split = 0;
-    }
-  }
-
-  // TX split
-  int64_t split_rd = INT64_MAX;
-  RD_STATS split_rd_stats;
-  av1_init_rd_stats(&split_rd_stats);
-  if (try_split) {
-    try_tx_block_split(cpi, x, blk_row, blk_col, block, tx_size, depth,
-                       plane_bsize, ta, tl, tx_above, tx_left, ctx, no_split.rd,
-                       AOMMIN(no_split.rd, ref_best_rd), ftxs_mode,
-                       rd_info_node, &split_rd_stats, &split_rd);
-  }
-
-  if (no_split.rd < split_rd) {
-    ENTROPY_CONTEXT *pta = ta + blk_col;
-    ENTROPY_CONTEXT *ptl = tl + blk_row;
-    const TX_SIZE tx_size_selected = tx_size;
-    p->txb_entropy_ctx[block] = no_split.txb_entropy_ctx;
-    av1_set_txb_context(x, 0, block, tx_size_selected, pta, ptl);
-    txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
-                          tx_size);
-    for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) {
-      for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) {
-        const int index =
-            av1_get_txb_size_index(plane_bsize, blk_row + idy, blk_col + idx);
-        mbmi->inter_tx_size[index] = tx_size_selected;
-      }
-    }
-    mbmi->tx_size = tx_size_selected;
-    update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
-                     no_split.tx_type);
-    set_blk_skip(x, 0, blk_row * bw + blk_col, rd_stats->skip);
-  } else {
-    *rd_stats = split_rd_stats;
-    if (split_rd == INT64_MAX) *is_cost_valid = 0;
-  }
-}
-
-static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
-                                   RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                                   int64_t ref_best_rd,
-                                   FAST_TX_SEARCH_MODE ftxs_mode,
-                                   TXB_RD_INFO_NODE *rd_info_tree) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int is_cost_valid = 1;
-  int64_t this_rd = 0, skip_rd = 0;
-
-  if (ref_best_rd < 0) is_cost_valid = 0;
-
-  av1_init_rd_stats(rd_stats);
-
-  if (is_cost_valid) {
-    const struct macroblockd_plane *const pd = &xd->plane[0];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    const int mi_width = mi_size_wide[plane_bsize];
-    const int mi_height = mi_size_high[plane_bsize];
-    const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
-    const int bh = tx_size_high_unit[max_tx_size];
-    const int bw = tx_size_wide_unit[max_tx_size];
-    int idx, idy;
-    int block = 0;
-    int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
-    ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE];
-    ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE];
-    TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
-    TXFM_CONTEXT tx_left[MAX_MIB_SIZE];
-
-    RD_STATS pn_rd_stats;
-    const int init_depth =
-        get_search_init_depth(mi_width, mi_height, 1, &cpi->sf);
-    av1_init_rd_stats(&pn_rd_stats);
-
-    av1_get_entropy_contexts(bsize, pd, ctxa, ctxl);
-    memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
-    memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
-    const int skip_ctx = av1_get_skip_context(xd);
-    const int s0 = x->skip_cost[skip_ctx][0];
-    const int s1 = x->skip_cost[skip_ctx][1];
-
-    skip_rd = RDCOST(x->rdmult, s1, 0);
-    this_rd = RDCOST(x->rdmult, s0, 0);
-    for (idy = 0; idy < mi_height; idy += bh) {
-      for (idx = 0; idx < mi_width; idx += bw) {
-        int64_t best_rd_sofar = (ref_best_rd - (AOMMIN(skip_rd, this_rd)));
-        select_tx_block(cpi, x, idy, idx, block, max_tx_size, init_depth,
-                        plane_bsize, ctxa, ctxl, tx_above, tx_left,
-                        &pn_rd_stats, best_rd_sofar, &is_cost_valid, ftxs_mode,
-                        rd_info_tree);
-        if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) {
-          av1_invalid_rd_stats(rd_stats);
-          return;
-        }
-        av1_merge_rd_stats(rd_stats, &pn_rd_stats);
-        skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse);
-        this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
-        block += step;
-        if (rd_info_tree != NULL) rd_info_tree += 1;
-      }
-    }
-    if (skip_rd <= this_rd) {
-      rd_stats->rate = 0;
-      rd_stats->dist = rd_stats->sse;
-      rd_stats->skip = 1;
-    } else {
-      rd_stats->skip = 0;
-    }
-  }
-
-  if (!is_cost_valid) {
-    // reset cost value
-    av1_invalid_rd_stats(rd_stats);
-  }
-}
-
-static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
-                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                                       int64_t ref_best_rd,
-                                       TXB_RD_INFO_NODE *rd_info_tree) {
-  const int fast_tx_search = cpi->sf.tx_size_search_method > USE_FULL_RD;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int is_inter = is_inter_block(mbmi);
-  const int skip_ctx = av1_get_skip_context(xd);
-  int s0 = x->skip_cost[skip_ctx][0];
-  int s1 = x->skip_cost[skip_ctx][1];
-  int64_t rd;
-
-  // TODO(debargha): enable this as a speed feature where the
-  // select_inter_block_yrd() function above will use a simplified search
-  // such as not using full optimize, but the inter_block_yrd() function
-  // will use more complex search given that the transform partitions have
-  // already been decided.
-
-  int64_t rd_thresh = ref_best_rd;
-  if (fast_tx_search && rd_thresh < INT64_MAX) {
-    if (INT64_MAX - rd_thresh > (rd_thresh >> 3)) rd_thresh += (rd_thresh >> 3);
-  }
-  assert(rd_thresh > 0);
-
-  FAST_TX_SEARCH_MODE ftxs_mode =
-      fast_tx_search ? FTXS_DCT_AND_1D_DCT_ONLY : FTXS_NONE;
-  select_inter_block_yrd(cpi, x, rd_stats, bsize, rd_thresh, ftxs_mode,
-                         rd_info_tree);
-  if (rd_stats->rate == INT_MAX) return INT64_MAX;
-
-  // If fast_tx_search is true, only DCT and 1D DCT were tested in
-  // select_inter_block_yrd() above. Do a better search for tx type with
-  // tx sizes already decided.
-  if (fast_tx_search) {
-    if (!inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, FTXS_NONE))
-      return INT64_MAX;
-  }
-
-  if (rd_stats->skip)
-    rd = RDCOST(x->rdmult, s1, rd_stats->sse);
-  else
-    rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
-
-  if (is_inter && !xd->lossless[xd->mi[0]->segment_id] && !(rd_stats->skip))
-    rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
-
-  return rd;
-}
-
-// Finds rd cost for a y block, given the transform size partitions
-static void tx_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
-                         int blk_col, int block, TX_SIZE tx_size,
-                         BLOCK_SIZE plane_bsize, int depth,
-                         ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
-                         TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
-                         int64_t ref_best_rd, RD_STATS *rd_stats,
-                         FAST_TX_SEARCH_MODE ftxs_mode) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
-  const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0);
-
-  assert(tx_size < TX_SIZES_ALL);
-
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
-  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
-      plane_bsize, blk_row, blk_col)];
-
-  int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
-                                   mbmi->sb_type, tx_size);
-
-  av1_init_rd_stats(rd_stats);
-  if (tx_size == plane_tx_size) {
-    ENTROPY_CONTEXT *ta = above_ctx + blk_col;
-    ENTROPY_CONTEXT *tl = left_ctx + blk_row;
-    const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-    TXB_CTX txb_ctx;
-    get_txb_ctx(plane_bsize, tx_size, 0, ta, tl, &txb_ctx);
-
-    const int zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(0)]
-                                  .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
-    rd_stats->zero_rate = zero_blk_rate;
-    rd_stats->ref_rdcost = ref_best_rd;
-    tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, 0, block, plane_bsize,
-                  &txb_ctx, rd_stats, ftxs_mode, ref_best_rd, NULL);
-    const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-    if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
-            RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
-        rd_stats->skip == 1) {
-      rd_stats->rate = zero_blk_rate;
-      rd_stats->dist = rd_stats->sse;
-      rd_stats->skip = 1;
-      set_blk_skip(x, 0, blk_row * mi_width + blk_col, 1);
-      x->plane[0].eobs[block] = 0;
-      x->plane[0].txb_entropy_ctx[block] = 0;
-      update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size,
-                       DCT_DCT);
-    } else {
-      rd_stats->skip = 0;
-      set_blk_skip(x, 0, blk_row * mi_width + blk_col, 0);
-    }
-    if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
-      rd_stats->rate += x->txfm_partition_cost[ctx][0];
-    av1_set_txb_context(x, 0, block, tx_size, ta, tl);
-    txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
-                          tx_size);
-  } else {
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    const int bsw = tx_size_wide_unit[sub_txs];
-    const int bsh = tx_size_high_unit[sub_txs];
-    const int step = bsh * bsw;
-    RD_STATS pn_rd_stats;
-    int64_t this_rd = 0;
-    assert(bsw > 0 && bsh > 0);
-
-    for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
-      for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
-        const int offsetr = blk_row + row;
-        const int offsetc = blk_col + col;
-
-        if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
-        av1_init_rd_stats(&pn_rd_stats);
-        tx_block_yrd(cpi, x, offsetr, offsetc, block, sub_txs, plane_bsize,
-                     depth + 1, above_ctx, left_ctx, tx_above, tx_left,
-                     ref_best_rd - this_rd, &pn_rd_stats, ftxs_mode);
-        if (pn_rd_stats.rate == INT_MAX) {
-          av1_invalid_rd_stats(rd_stats);
-          return;
-        }
-        av1_merge_rd_stats(rd_stats, &pn_rd_stats);
-        this_rd += RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist);
-        block += step;
-      }
-    }
-
-    if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
-      rd_stats->rate += x->txfm_partition_cost[ctx][1];
-  }
-}
-
-// Return value 0: early termination triggered, no valid rd cost available;
-//              1: rd cost values are valid.
-static int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
-                           RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                           int64_t ref_best_rd, FAST_TX_SEARCH_MODE ftxs_mode) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int is_cost_valid = 1;
-  int64_t this_rd = 0;
-
-  if (ref_best_rd < 0) is_cost_valid = 0;
-
-  av1_init_rd_stats(rd_stats);
-
-  if (is_cost_valid) {
-    const struct macroblockd_plane *const pd = &xd->plane[0];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    const int mi_width = mi_size_wide[plane_bsize];
-    const int mi_height = mi_size_high[plane_bsize];
-    const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
-    const int bh = tx_size_high_unit[max_tx_size];
-    const int bw = tx_size_wide_unit[max_tx_size];
-    const int init_depth =
-        get_search_init_depth(mi_width, mi_height, 1, &cpi->sf);
-    int idx, idy;
-    int block = 0;
-    int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
-    ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE];
-    ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE];
-    TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
-    TXFM_CONTEXT tx_left[MAX_MIB_SIZE];
-    RD_STATS pn_rd_stats;
-
-    av1_get_entropy_contexts(bsize, pd, ctxa, ctxl);
-    memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
-    memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
-
-    for (idy = 0; idy < mi_height; idy += bh) {
-      for (idx = 0; idx < mi_width; idx += bw) {
-        av1_init_rd_stats(&pn_rd_stats);
-        tx_block_yrd(cpi, x, idy, idx, block, max_tx_size, plane_bsize,
-                     init_depth, ctxa, ctxl, tx_above, tx_left,
-                     ref_best_rd - this_rd, &pn_rd_stats, ftxs_mode);
-        if (pn_rd_stats.rate == INT_MAX) {
-          av1_invalid_rd_stats(rd_stats);
-          return 0;
-        }
-        av1_merge_rd_stats(rd_stats, &pn_rd_stats);
-        this_rd +=
-            AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
-                   RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
-        block += step;
-      }
-    }
-  }
-
-  const int skip_ctx = av1_get_skip_context(xd);
-  const int s0 = x->skip_cost[skip_ctx][0];
-  const int s1 = x->skip_cost[skip_ctx][1];
-  int64_t skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse);
-  this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
-  if (skip_rd < this_rd) {
-    this_rd = skip_rd;
-    rd_stats->rate = 0;
-    rd_stats->dist = rd_stats->sse;
-    rd_stats->skip = 1;
-  }
-  if (this_rd > ref_best_rd) is_cost_valid = 0;
-
-  if (!is_cost_valid) {
-    // reset cost value
-    av1_invalid_rd_stats(rd_stats);
-  }
-  return is_cost_valid;
-}
-
-static INLINE uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
-  const int rows = block_size_high[bsize];
-  const int cols = block_size_wide[bsize];
-  const int16_t *diff = x->plane[0].src_diff;
-  const uint32_t hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator,
-                                             (uint8_t *)diff, 2 * rows * cols);
-  return (hash << 5) + bsize;
-}
-
-static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
-                            const RD_STATS *const rd_stats,
-                            MB_RD_RECORD *tx_rd_record) {
-  int index;
-  if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
-    index =
-        (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
-    ++tx_rd_record->num;
-  } else {
-    index = tx_rd_record->index_start;
-    tx_rd_record->index_start =
-        (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
-  }
-  MB_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index];
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  tx_rd_info->hash_value = hash;
-  tx_rd_info->tx_size = mbmi->tx_size;
-  memcpy(tx_rd_info->blk_skip, x->blk_skip,
-         sizeof(tx_rd_info->blk_skip[0]) * n4);
-  av1_copy(tx_rd_info->inter_tx_size, mbmi->inter_tx_size);
-  av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
-  tx_rd_info->rd_stats = *rd_stats;
-}
-
-static void fetch_tx_rd_info(int n4, const MB_RD_INFO *const tx_rd_info,
-                             RD_STATS *const rd_stats, MACROBLOCK *const x) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  mbmi->tx_size = tx_rd_info->tx_size;
-  memcpy(x->blk_skip, tx_rd_info->blk_skip,
-         sizeof(tx_rd_info->blk_skip[0]) * n4);
-  av1_copy(mbmi->inter_tx_size, tx_rd_info->inter_tx_size);
-  av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
-  *rd_stats = tx_rd_info->rd_stats;
-}
-
-static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record,
-                                const uint32_t hash) {
-  // Linear search through the circular buffer to find matching hash.
-  for (int i = cur_record->index_start - 1; i >= 0; i--) {
-    if (cur_record->hash_vals[i] == hash) return i;
-  }
-  for (int i = cur_record->num - 1; i >= cur_record->index_start; i--) {
-    if (cur_record->hash_vals[i] == hash) return i;
-  }
-  int index;
-  // If not found - add new RD info into the buffer and return its index
-  if (cur_record->num < TX_SIZE_RD_RECORD_BUFFER_LEN) {
-    index = (cur_record->index_start + cur_record->num) %
-            TX_SIZE_RD_RECORD_BUFFER_LEN;
-    cur_record->num++;
-  } else {
-    index = cur_record->index_start;
-    cur_record->index_start =
-        (cur_record->index_start + 1) % TX_SIZE_RD_RECORD_BUFFER_LEN;
-  }
-
-  cur_record->hash_vals[index] = hash;
-  av1_zero(cur_record->tx_rd_info[index]);
-  return index;
-}
-
-typedef struct {
-  int leaf;
-  int8_t children[4];
-} RD_RECORD_IDX_NODE;
-
-static const RD_RECORD_IDX_NODE rd_record_tree_8x8[] = {
-  { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_8x16[] = {
-  { 0, { 1, 2, -1, -1 } },
-  { 1, { 0, 0, 0, 0 } },
-  { 1, { 0, 0, 0, 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_16x8[] = {
-  { 0, { 1, 2, -1, -1 } },
-  { 1, { 0 } },
-  { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_16x16[] = {
-  { 0, { 1, 2, 3, 4 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_1_2[] = {
-  { 0, { 1, 2, -1, -1 } },
-  { 0, { 3, 4, 5, 6 } },
-  { 0, { 7, 8, 9, 10 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_2_1[] = {
-  { 0, { 1, 2, -1, -1 } },
-  { 0, { 3, 4, 7, 8 } },
-  { 0, { 5, 6, 9, 10 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_sqr[] = {
-  { 0, { 1, 2, 3, 4 } },     { 0, { 5, 6, 9, 10 } },    { 0, { 7, 8, 11, 12 } },
-  { 0, { 13, 14, 17, 18 } }, { 0, { 15, 16, 19, 20 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_64x128[] = {
-  { 0, { 2, 3, 4, 5 } },     { 0, { 6, 7, 8, 9 } },
-  { 0, { 10, 11, 14, 15 } }, { 0, { 12, 13, 16, 17 } },
-  { 0, { 18, 19, 22, 23 } }, { 0, { 20, 21, 24, 25 } },
-  { 0, { 26, 27, 30, 31 } }, { 0, { 28, 29, 32, 33 } },
-  { 0, { 34, 35, 38, 39 } }, { 0, { 36, 37, 40, 41 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_128x64[] = {
-  { 0, { 2, 3, 6, 7 } },     { 0, { 4, 5, 8, 9 } },
-  { 0, { 10, 11, 18, 19 } }, { 0, { 12, 13, 20, 21 } },
-  { 0, { 14, 15, 22, 23 } }, { 0, { 16, 17, 24, 25 } },
-  { 0, { 26, 27, 34, 35 } }, { 0, { 28, 29, 36, 37 } },
-  { 0, { 30, 31, 38, 39 } }, { 0, { 32, 33, 40, 41 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_128x128[] = {
-  { 0, { 4, 5, 8, 9 } },     { 0, { 6, 7, 10, 11 } },
-  { 0, { 12, 13, 16, 17 } }, { 0, { 14, 15, 18, 19 } },
-  { 0, { 20, 21, 28, 29 } }, { 0, { 22, 23, 30, 31 } },
-  { 0, { 24, 25, 32, 33 } }, { 0, { 26, 27, 34, 35 } },
-  { 0, { 36, 37, 44, 45 } }, { 0, { 38, 39, 46, 47 } },
-  { 0, { 40, 41, 48, 49 } }, { 0, { 42, 43, 50, 51 } },
-  { 0, { 52, 53, 60, 61 } }, { 0, { 54, 55, 62, 63 } },
-  { 0, { 56, 57, 64, 65 } }, { 0, { 58, 59, 66, 67 } },
-  { 0, { 68, 69, 76, 77 } }, { 0, { 70, 71, 78, 79 } },
-  { 0, { 72, 73, 80, 81 } }, { 0, { 74, 75, 82, 83 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_1_4[] = {
-  { 0, { 1, -1, 2, -1 } },
-  { 0, { 3, 4, -1, -1 } },
-  { 0, { 5, 6, -1, -1 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_4_1[] = {
-  { 0, { 1, 2, -1, -1 } },
-  { 0, { 3, 4, -1, -1 } },
-  { 0, { 5, 6, -1, -1 } },
-};
-
-static const RD_RECORD_IDX_NODE *rd_record_tree[BLOCK_SIZES_ALL] = {
-  NULL,                    // BLOCK_4X4
-  NULL,                    // BLOCK_4X8
-  NULL,                    // BLOCK_8X4
-  rd_record_tree_8x8,      // BLOCK_8X8
-  rd_record_tree_8x16,     // BLOCK_8X16
-  rd_record_tree_16x8,     // BLOCK_16X8
-  rd_record_tree_16x16,    // BLOCK_16X16
-  rd_record_tree_1_2,      // BLOCK_16X32
-  rd_record_tree_2_1,      // BLOCK_32X16
-  rd_record_tree_sqr,      // BLOCK_32X32
-  rd_record_tree_1_2,      // BLOCK_32X64
-  rd_record_tree_2_1,      // BLOCK_64X32
-  rd_record_tree_sqr,      // BLOCK_64X64
-  rd_record_tree_64x128,   // BLOCK_64X128
-  rd_record_tree_128x64,   // BLOCK_128X64
-  rd_record_tree_128x128,  // BLOCK_128X128
-  NULL,                    // BLOCK_4X16
-  NULL,                    // BLOCK_16X4
-  rd_record_tree_1_4,      // BLOCK_8X32
-  rd_record_tree_4_1,      // BLOCK_32X8
-  rd_record_tree_1_4,      // BLOCK_16X64
-  rd_record_tree_4_1,      // BLOCK_64X16
-};
-
-static const int rd_record_tree_size[BLOCK_SIZES_ALL] = {
-  0,                                                            // BLOCK_4X4
-  0,                                                            // BLOCK_4X8
-  0,                                                            // BLOCK_8X4
-  sizeof(rd_record_tree_8x8) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_8X8
-  sizeof(rd_record_tree_8x16) / sizeof(RD_RECORD_IDX_NODE),     // BLOCK_8X16
-  sizeof(rd_record_tree_16x8) / sizeof(RD_RECORD_IDX_NODE),     // BLOCK_16X8
-  sizeof(rd_record_tree_16x16) / sizeof(RD_RECORD_IDX_NODE),    // BLOCK_16X16
-  sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_16X32
-  sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_32X16
-  sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_32X32
-  sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_32X64
-  sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_64X32
-  sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_64X64
-  sizeof(rd_record_tree_64x128) / sizeof(RD_RECORD_IDX_NODE),   // BLOCK_64X128
-  sizeof(rd_record_tree_128x64) / sizeof(RD_RECORD_IDX_NODE),   // BLOCK_128X64
-  sizeof(rd_record_tree_128x128) / sizeof(RD_RECORD_IDX_NODE),  // BLOCK_128X128
-  0,                                                            // BLOCK_4X16
-  0,                                                            // BLOCK_16X4
-  sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_8X32
-  sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_32X8
-  sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_16X64
-  sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE),      // BLOCK_64X16
-};
-
-static INLINE void init_rd_record_tree(TXB_RD_INFO_NODE *tree,
-                                       BLOCK_SIZE bsize) {
-  const RD_RECORD_IDX_NODE *rd_record = rd_record_tree[bsize];
-  const int size = rd_record_tree_size[bsize];
-  for (int i = 0; i < size; ++i) {
-    if (rd_record[i].leaf) {
-      av1_zero(tree[i].children);
-    } else {
-      for (int j = 0; j < 4; ++j) {
-        const int8_t idx = rd_record[i].children[j];
-        tree[i].children[j] = idx > 0 ? &tree[idx] : NULL;
-      }
-    }
-  }
-}
-
-// Go through all TX blocks that could be used in TX size search, compute
-// residual hash values for them and find matching RD info that stores previous
-// RD search results for these TX blocks. The idea is to prevent repeated
-// rate/distortion computations that happen because of the combination of
-// partition and TX size search. The resulting RD info records are returned in
-// the form of a quadtree for easier access in actual TX size search.
-static int find_tx_size_rd_records(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
-                                   int mi_col, TXB_RD_INFO_NODE *dst_rd_info) {
-  TXB_RD_RECORD *rd_records_table[4] = { x->txb_rd_record_8X8,
-                                         x->txb_rd_record_16X16,
-                                         x->txb_rd_record_32X32,
-                                         x->txb_rd_record_64X64 };
-  const TX_SIZE max_square_tx_size = max_txsize_lookup[bsize];
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-
-  // Hashing is performed only for square TX sizes larger than TX_4X4
-  if (max_square_tx_size < TX_8X8) return 0;
-  const int diff_stride = bw;
-  const struct macroblock_plane *const p = &x->plane[0];
-  const int16_t *diff = &p->src_diff[0];
-  init_rd_record_tree(dst_rd_info, bsize);
-  // Coordinates of the top-left corner of current block within the superblock
-  // measured in pixels:
-  const int mi_row_in_sb = (mi_row % MAX_MIB_SIZE) << MI_SIZE_LOG2;
-  const int mi_col_in_sb = (mi_col % MAX_MIB_SIZE) << MI_SIZE_LOG2;
-  int cur_rd_info_idx = 0;
-  int cur_tx_depth = 0;
-  TX_SIZE cur_tx_size = max_txsize_rect_lookup[bsize];
-  while (cur_tx_depth <= MAX_VARTX_DEPTH) {
-    const int cur_tx_bw = tx_size_wide[cur_tx_size];
-    const int cur_tx_bh = tx_size_high[cur_tx_size];
-    if (cur_tx_bw < 8 || cur_tx_bh < 8) break;
-    const TX_SIZE next_tx_size = sub_tx_size_map[cur_tx_size];
-    const int tx_size_idx = cur_tx_size - TX_8X8;
-    for (int row = 0; row < bh; row += cur_tx_bh) {
-      for (int col = 0; col < bw; col += cur_tx_bw) {
-        if (cur_tx_bw != cur_tx_bh) {
-          // Use dummy nodes for all rectangular transforms within the
-          // TX size search tree.
-          dst_rd_info[cur_rd_info_idx].rd_info_array = NULL;
-        } else {
-          // Get spatial location of this TX block within the superblock
-          // (measured in cur_tx_bsize units).
-          const int row_in_sb = (mi_row_in_sb + row) / cur_tx_bh;
-          const int col_in_sb = (mi_col_in_sb + col) / cur_tx_bw;
-
-          int16_t hash_data[MAX_SB_SQUARE];
-          int16_t *cur_hash_row = hash_data;
-          const int16_t *cur_diff_row = diff + row * diff_stride + col;
-          for (int i = 0; i < cur_tx_bh; i++) {
-            memcpy(cur_hash_row, cur_diff_row, sizeof(*hash_data) * cur_tx_bw);
-            cur_hash_row += cur_tx_bw;
-            cur_diff_row += diff_stride;
-          }
-          const int hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator,
-                                                (uint8_t *)hash_data,
-                                                2 * cur_tx_bw * cur_tx_bh);
-          // Find corresponding RD info based on the hash value.
-          const int record_idx =
-              row_in_sb * (MAX_MIB_SIZE >> (tx_size_idx + 1)) + col_in_sb;
-          TXB_RD_RECORD *records = &rd_records_table[tx_size_idx][record_idx];
-          int idx = find_tx_size_rd_info(records, hash);
-          dst_rd_info[cur_rd_info_idx].rd_info_array =
-              &records->tx_rd_info[idx];
-        }
-        ++cur_rd_info_idx;
-      }
-    }
-    cur_tx_size = next_tx_size;
-    ++cur_tx_depth;
-  }
-  return 1;
-}
-
-// origin_threshold * 128 / 100
-static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = {
-  {
-      64, 64, 64, 70, 60, 60, 68, 68, 68, 68, 68,
-      68, 68, 68, 68, 68, 64, 64, 70, 70, 68, 68,
-  },
-  {
-      88, 88, 88, 86, 87, 87, 68, 68, 68, 68, 68,
-      68, 68, 68, 68, 68, 88, 88, 86, 86, 68, 68,
-  },
-  {
-      90, 93, 93, 90, 93, 93, 74, 74, 74, 74, 74,
-      74, 74, 74, 74, 74, 90, 90, 90, 90, 74, 74,
-  },
-};
-
-// lookup table for predict_skip_flag
-// int max_tx_size = max_txsize_rect_lookup[bsize];
-// if (tx_size_high[max_tx_size] > 16 || tx_size_wide[max_tx_size] > 16)
-//   max_tx_size = AOMMIN(max_txsize_lookup[bsize], TX_16X16);
-static const TX_SIZE max_predict_sf_tx_size[BLOCK_SIZES_ALL] = {
-  TX_4X4,   TX_4X8,   TX_8X4,   TX_8X8,   TX_8X16,  TX_16X8,
-  TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_16X16,
-  TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_4X16,  TX_16X4,
-  TX_8X8,   TX_8X8,   TX_16X16, TX_16X16,
-};
-
-// Uses simple features on top of DCT coefficients to quickly predict
-// whether optimal RD decision is to skip encoding the residual.
-// The sse value is stored in dist.
-static int predict_skip_flag(MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *dist,
-                             int reduced_tx_set) {
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-  const MACROBLOCKD *xd = &x->e_mbd;
-  const int16_t dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd);
-
-  *dist = pixel_diff_dist(x, 0, 0, 0, bsize, bsize);
-  const int64_t mse = *dist / bw / bh;
-  // Normalized quantizer takes the transform upscaling factor (8 for tx size
-  // smaller than 32) into account.
-  const int16_t normalized_dc_q = dc_q >> 3;
-  const int64_t mse_thresh = (int64_t)normalized_dc_q * normalized_dc_q / 8;
-  // Predict not to skip when mse is larger than threshold.
-  if (mse > mse_thresh) return 0;
-
-  const int max_tx_size = max_predict_sf_tx_size[bsize];
-  const int tx_h = tx_size_high[max_tx_size];
-  const int tx_w = tx_size_wide[max_tx_size];
-  DECLARE_ALIGNED(32, tran_low_t, coefs[32 * 32]);
-  TxfmParam param;
-  param.tx_type = DCT_DCT;
-  param.tx_size = max_tx_size;
-  param.bd = xd->bd;
-  param.is_hbd = get_bitdepth_data_path_index(xd);
-  param.lossless = 0;
-  param.tx_set_type = av1_get_ext_tx_set_type(
-      param.tx_size, is_inter_block(xd->mi[0]), reduced_tx_set);
-  const int bd_idx = (xd->bd == 8) ? 0 : ((xd->bd == 10) ? 1 : 2);
-  const uint32_t max_qcoef_thresh = skip_pred_threshold[bd_idx][bsize];
-  const int16_t *src_diff = x->plane[0].src_diff;
-  const int n_coeff = tx_w * tx_h;
-  const int16_t ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
-  const uint32_t dc_thresh = max_qcoef_thresh * dc_q;
-  const uint32_t ac_thresh = max_qcoef_thresh * ac_q;
-  for (int row = 0; row < bh; row += tx_h) {
-    for (int col = 0; col < bw; col += tx_w) {
-      av1_fwd_txfm(src_diff + col, coefs, bw, &param);
-      // Operating on TX domain, not pixels; we want the QTX quantizers
-      const uint32_t dc_coef = (((uint32_t)abs(coefs[0])) << 7);
-      if (dc_coef >= dc_thresh) return 0;
-      for (int i = 1; i < n_coeff; ++i) {
-        const uint32_t ac_coef = (((uint32_t)abs(coefs[i])) << 7);
-        if (ac_coef >= ac_thresh) return 0;
-      }
-    }
-    src_diff += tx_h * bw;
-  }
-  return 1;
-}
-
-// Used to set proper context for early termination with skip = 1.
-static void set_skip_flag(MACROBLOCK *x, RD_STATS *rd_stats, int bsize,
-                          int64_t dist) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int n4 = bsize_to_num_blk(bsize);
-  const TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
-  memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
-  memset(mbmi->inter_tx_size, tx_size, sizeof(mbmi->inter_tx_size));
-  mbmi->tx_size = tx_size;
-  for (int i = 0; i < n4; ++i) set_blk_skip(x, 0, i, 1);
-  rd_stats->skip = 1;
-  rd_stats->rate = 0;
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-    dist = ROUND_POWER_OF_TWO(dist, (xd->bd - 8) * 2);
-  rd_stats->dist = rd_stats->sse = (dist << 4);
-}
-
-static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
-                               RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row,
-                               int mi_col, int64_t ref_best_rd) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  int64_t rd = INT64_MAX;
-  int64_t best_rd = INT64_MAX;
-  const int is_inter = is_inter_block(mbmi);
-  const int n4 = bsize_to_num_blk(bsize);
-  // Get the tx_size 1 level down
-  const TX_SIZE min_tx_size = sub_tx_size_map[max_txsize_rect_lookup[bsize]];
-  const TxSetType tx_set_type =
-      av1_get_ext_tx_set_type(min_tx_size, is_inter, cm->reduced_tx_set_used);
-  const int within_border =
-      mi_row >= xd->tile.mi_row_start &&
-      (mi_row + mi_size_high[bsize] < xd->tile.mi_row_end) &&
-      mi_col >= xd->tile.mi_col_start &&
-      (mi_col + mi_size_wide[bsize] < xd->tile.mi_col_end);
-
-  av1_invalid_rd_stats(rd_stats);
-
-  if (cpi->sf.model_based_prune_tx_search_level && ref_best_rd != INT64_MAX) {
-    int model_rate;
-    int64_t model_dist;
-    int model_skip;
-    model_rd_sb_fn[MODELRD_TYPE_TX_SEARCH_PRUNE](
-        cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &model_rate, &model_dist,
-        &model_skip, NULL, NULL, NULL, NULL);
-    const int64_t model_rd = RDCOST(x->rdmult, model_rate, model_dist);
-    // If the modeled rd is a lot worse than the best so far, breakout.
-    // TODO(debargha, urvang): Improve the model and make the check below
-    // tighter.
-    assert(cpi->sf.model_based_prune_tx_search_level >= 0 &&
-           cpi->sf.model_based_prune_tx_search_level <= 2);
-    static const int prune_factor_by8[] = { 2 + MODELRD_TYPE_TX_SEARCH_PRUNE,
-                                            4 + MODELRD_TYPE_TX_SEARCH_PRUNE };
-    if (!model_skip &&
-        ((model_rd *
-          prune_factor_by8[cpi->sf.model_based_prune_tx_search_level - 1]) >>
-         3) > ref_best_rd)
-      return;
-  }
-
-  const uint32_t hash = get_block_residue_hash(x, bsize);
-  MB_RD_RECORD *mb_rd_record = &x->mb_rd_record;
-
-  if (ref_best_rd != INT64_MAX && within_border && cpi->sf.use_mb_rd_hash) {
-    for (int i = 0; i < mb_rd_record->num; ++i) {
-      const int index = (mb_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN;
-      // If there is a match in the tx_rd_record, fetch the RD decision and
-      // terminate early.
-      if (mb_rd_record->tx_rd_info[index].hash_value == hash) {
-        MB_RD_INFO *tx_rd_info = &mb_rd_record->tx_rd_info[index];
-        fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x);
-        return;
-      }
-    }
-  }
-
-  // If we predict that skip is the optimal RD decision - set the respective
-  // context and terminate early.
-  int64_t dist;
-  if (is_inter && cpi->sf.tx_type_search.use_skip_flag_prediction &&
-      predict_skip_flag(x, bsize, &dist, cm->reduced_tx_set_used)) {
-    set_skip_flag(x, rd_stats, bsize, dist);
-    // Save the RD search results into tx_rd_record.
-    if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record);
-    return;
-  }
-
-  // Precompute residual hashes and find existing or add new RD records to
-  // store and reuse rate and distortion values to speed up TX size search.
-  TXB_RD_INFO_NODE matched_rd_info[4 + 16 + 64];
-  int found_rd_info = 0;
-  if (ref_best_rd != INT64_MAX && within_border && cpi->sf.use_inter_txb_hash) {
-    found_rd_info =
-        find_tx_size_rd_records(x, bsize, mi_row, mi_col, matched_rd_info);
-  }
-
-  prune_tx(cpi, bsize, x, xd, tx_set_type);
-
-  int found = 0;
-
-  RD_STATS this_rd_stats;
-  av1_init_rd_stats(&this_rd_stats);
-
-  rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
-                               found_rd_info ? matched_rd_info : NULL);
-  assert(IMPLIES(this_rd_stats.skip && !this_rd_stats.invalid_rate,
-                 this_rd_stats.rate == 0));
-
-  ref_best_rd = AOMMIN(rd, ref_best_rd);
-  if (rd < best_rd) {
-    *rd_stats = this_rd_stats;
-    found = 1;
-  }
-
-  // Reset the pruning flags.
-  av1_zero(x->tx_search_prune);
-  x->tx_split_prune_flag = 0;
-
-  // We should always find at least one candidate unless ref_best_rd is less
-  // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type
-  // might have failed to find something better)
-  assert(IMPLIES(!found, ref_best_rd != INT64_MAX));
-  if (!found) return;
-
-  // Save the RD search results into tx_rd_record.
-  if (within_border && cpi->sf.use_mb_rd_hash)
-    save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record);
-}
-
-static void tx_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
-                          int blk_col, int plane, int block, TX_SIZE tx_size,
-                          BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
-                          ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats,
-                          FAST_TX_SEARCH_MODE ftxs_mode) {
-  assert(plane > 0);
-  assert(tx_size < TX_SIZES_ALL);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
-  const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
-  ENTROPY_CONTEXT *ta = above_ctx + blk_col;
-  ENTROPY_CONTEXT *tl = left_ctx + blk_row;
-  TXB_CTX txb_ctx;
-  get_txb_ctx(plane_bsize, tx_size, plane, ta, tl, &txb_ctx);
-  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-  const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_UV]
-                                .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
-  tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize,
-                &txb_ctx, rd_stats, ftxs_mode, INT64_MAX, NULL);
-
-  const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-  const int blk_idx = blk_row * mi_width + blk_col;
-
-  av1_set_txb_context(x, plane, block, tx_size, ta, tl);
-  if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
-           RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
-       rd_stats->skip == 1) &&
-      !xd->lossless[mbmi->segment_id]) {
-    rd_stats->rate = zero_blk_rate;
-    rd_stats->dist = rd_stats->sse;
-  }
-
-  // Set chroma blk_skip to 0
-  set_blk_skip(x, plane, blk_idx, 0);
-}
-
-// Return value 0: early termination triggered, no valid rd cost available;
-//              1: rd cost values are valid.
-static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
-                            RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                            int64_t non_skip_ref_best_rd,
-                            int64_t skip_ref_best_rd,
-                            FAST_TX_SEARCH_MODE ftxs_mode) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  int plane;
-  int is_cost_valid = 1;
-  int64_t this_rd = 0;
-  int64_t skip_rd = 0;
-
-  if ((non_skip_ref_best_rd < 0) && (skip_ref_best_rd < 0)) is_cost_valid = 0;
-
-  av1_init_rd_stats(rd_stats);
-
-  if (x->skip_chroma_rd) {
-    if (!is_cost_valid) av1_invalid_rd_stats(rd_stats);
-
-    return is_cost_valid;
-  }
-
-  const BLOCK_SIZE bsizec = scale_chroma_bsize(
-      bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
-
-  if (is_inter_block(mbmi) && is_cost_valid) {
-    for (plane = 1; plane < MAX_MB_PLANE; ++plane)
-      av1_subtract_plane(x, bsizec, plane);
-  }
-
-  if (is_cost_valid) {
-    for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
-      const struct macroblockd_plane *const pd = &xd->plane[plane];
-      const BLOCK_SIZE plane_bsize =
-          get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y);
-      const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-      const int mi_height =
-          block_size_high[plane_bsize] >> tx_size_high_log2[0];
-      const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane);
-      const int bh = tx_size_high_unit[max_tx_size];
-      const int bw = tx_size_wide_unit[max_tx_size];
-      int idx, idy;
-      int block = 0;
-      const int step = bh * bw;
-      ENTROPY_CONTEXT ta[MAX_MIB_SIZE];
-      ENTROPY_CONTEXT tl[MAX_MIB_SIZE];
-      av1_get_entropy_contexts(bsizec, pd, ta, tl);
-
-      for (idy = 0; idy < mi_height; idy += bh) {
-        for (idx = 0; idx < mi_width; idx += bw) {
-          RD_STATS pn_rd_stats;
-          av1_init_rd_stats(&pn_rd_stats);
-          tx_block_uvrd(cpi, x, idy, idx, plane, block, max_tx_size,
-                        plane_bsize, ta, tl, &pn_rd_stats, ftxs_mode);
-          if (pn_rd_stats.rate == INT_MAX) {
-            av1_invalid_rd_stats(rd_stats);
-            return 0;
-          }
-          av1_merge_rd_stats(rd_stats, &pn_rd_stats);
-          this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-          skip_rd = RDCOST(x->rdmult, 0, rd_stats->sse);
-          if ((this_rd > non_skip_ref_best_rd) &&
-              (skip_rd > skip_ref_best_rd)) {
-            av1_invalid_rd_stats(rd_stats);
-            return 0;
-          }
-          block += step;
-        }
-      }
-    }
-  } else {
-    // reset cost value
-    av1_invalid_rd_stats(rd_stats);
-  }
-
-  return is_cost_valid;
-}
-
-static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                       int dc_mode_cost,
-                                       uint8_t *best_palette_color_map,
-                                       MB_MODE_INFO *const best_mbmi,
-                                       int64_t *best_rd, int *rate,
-                                       int *rate_tokenonly, int64_t *distortion,
-                                       int *skippable) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  assert(!is_inter_block(mbmi));
-  assert(
-      av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type));
-  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  const SequenceHeader *const seq_params = &cpi->common.seq_params;
-  int this_rate;
-  int64_t this_rd;
-  int colors_u, colors_v, colors;
-  const int src_stride = x->plane[1].src.stride;
-  const uint8_t *const src_u = x->plane[1].src.buf;
-  const uint8_t *const src_v = x->plane[2].src.buf;
-  uint8_t *const color_map = xd->plane[1].color_index_map;
-  RD_STATS tokenonly_rd_stats;
-  int plane_block_width, plane_block_height, rows, cols;
-  av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
-                           &plane_block_height, &rows, &cols);
-
-  mbmi->uv_mode = UV_DC_PRED;
-
-  int count_buf[1 << 12];  // Maximum (1 << 12) color levels.
-  if (seq_params->use_highbitdepth) {
-    colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
-                                       seq_params->bit_depth, count_buf);
-    colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
-                                       seq_params->bit_depth, count_buf);
-  } else {
-    colors_u = av1_count_colors(src_u, src_stride, rows, cols, count_buf);
-    colors_v = av1_count_colors(src_v, src_stride, rows, cols, count_buf);
-  }
-
-  uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-  const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
-
-  colors = colors_u > colors_v ? colors_u : colors_v;
-  if (colors > 1 && colors <= 64) {
-    int r, c, n, i, j;
-    const int max_itr = 50;
-    int lb_u, ub_u, val_u;
-    int lb_v, ub_v, val_v;
-    int *const data = x->palette_buffer->kmeans_data_buf;
-    int centroids[2 * PALETTE_MAX_SIZE];
-
-    uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
-    uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
-    if (seq_params->use_highbitdepth) {
-      lb_u = src_u16[0];
-      ub_u = src_u16[0];
-      lb_v = src_v16[0];
-      ub_v = src_v16[0];
-    } else {
-      lb_u = src_u[0];
-      ub_u = src_u[0];
-      lb_v = src_v[0];
-      ub_v = src_v[0];
-    }
-
-    for (r = 0; r < rows; ++r) {
-      for (c = 0; c < cols; ++c) {
-        if (seq_params->use_highbitdepth) {
-          val_u = src_u16[r * src_stride + c];
-          val_v = src_v16[r * src_stride + c];
-          data[(r * cols + c) * 2] = val_u;
-          data[(r * cols + c) * 2 + 1] = val_v;
-        } else {
-          val_u = src_u[r * src_stride + c];
-          val_v = src_v[r * src_stride + c];
-          data[(r * cols + c) * 2] = val_u;
-          data[(r * cols + c) * 2 + 1] = val_v;
-        }
-        if (val_u < lb_u)
-          lb_u = val_u;
-        else if (val_u > ub_u)
-          ub_u = val_u;
-        if (val_v < lb_v)
-          lb_v = val_v;
-        else if (val_v > ub_v)
-          ub_v = val_v;
-      }
-    }
-
-    for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
-         --n) {
-      for (i = 0; i < n; ++i) {
-        centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
-        centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
-      }
-      av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
-      optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
-      // Sort the U channel colors in ascending order.
-      for (i = 0; i < 2 * (n - 1); i += 2) {
-        int min_idx = i;
-        int min_val = centroids[i];
-        for (j = i + 2; j < 2 * n; j += 2)
-          if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
-        if (min_idx != i) {
-          int temp_u = centroids[i], temp_v = centroids[i + 1];
-          centroids[i] = centroids[min_idx];
-          centroids[i + 1] = centroids[min_idx + 1];
-          centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
-        }
-      }
-      av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
-      extend_palette_color_map(color_map, cols, rows, plane_block_width,
-                               plane_block_height);
-      pmi->palette_size[1] = n;
-      for (i = 1; i < 3; ++i) {
-        for (j = 0; j < n; ++j) {
-          if (seq_params->use_highbitdepth)
-            pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
-                (int)centroids[j * 2 + i - 1], seq_params->bit_depth);
-          else
-            pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
-                clip_pixel((int)centroids[j * 2 + i - 1]);
-        }
-      }
-
-      super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
-      if (tokenonly_rd_stats.rate == INT_MAX) continue;
-      this_rate = tokenonly_rd_stats.rate +
-                  intra_mode_info_cost_uv(cpi, x, mbmi, bsize, dc_mode_cost);
-      this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-      if (this_rd < *best_rd) {
-        *best_rd = this_rd;
-        *best_mbmi = *mbmi;
-        memcpy(best_palette_color_map, color_map,
-               plane_block_width * plane_block_height *
-                   sizeof(best_palette_color_map[0]));
-        *rate = this_rate;
-        *distortion = tokenonly_rd_stats.dist;
-        *rate_tokenonly = tokenonly_rd_stats.rate;
-        *skippable = tokenonly_rd_stats.skip;
-      }
-    }
-  }
-  if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
-    memcpy(color_map, best_palette_color_map,
-           plane_block_width * plane_block_height *
-               sizeof(best_palette_color_map[0]));
-  }
-}
-
-// Run RD calculation with given chroma intra prediction angle., and return
-// the RD cost. Update the best mode info. if the RD cost is the best so far.
-static int64_t pick_intra_angle_routine_sbuv(
-    const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-    int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
-    int *best_angle_delta, int64_t *best_rd) {
-  MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
-  assert(!is_inter_block(mbmi));
-  int this_rate;
-  int64_t this_rd;
-  RD_STATS tokenonly_rd_stats;
-
-  if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
-    return INT64_MAX;
-  this_rate = tokenonly_rd_stats.rate +
-              intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead);
-  this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-  if (this_rd < *best_rd) {
-    *best_rd = this_rd;
-    *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
-    *rate = this_rate;
-    rd_stats->rate = tokenonly_rd_stats.rate;
-    rd_stats->dist = tokenonly_rd_stats.dist;
-    rd_stats->skip = tokenonly_rd_stats.skip;
-  }
-  return this_rd;
-}
-
-// With given chroma directional intra prediction mode, pick the best angle
-// delta. Return true if a RD cost that is smaller than the input one is found.
-static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    BLOCK_SIZE bsize, int rate_overhead,
-                                    int64_t best_rd, int *rate,
-                                    RD_STATS *rd_stats) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  assert(!is_inter_block(mbmi));
-  int i, angle_delta, best_angle_delta = 0;
-  int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
-
-  rd_stats->rate = INT_MAX;
-  rd_stats->skip = 0;
-  rd_stats->dist = INT64_MAX;
-  for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
-
-  for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
-    for (i = 0; i < 2; ++i) {
-      best_rd_in = (best_rd == INT64_MAX)
-                       ? INT64_MAX
-                       : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
-      mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
-      this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
-                                              best_rd_in, rate, rd_stats,
-                                              &best_angle_delta, &best_rd);
-      rd_cost[2 * angle_delta + i] = this_rd;
-      if (angle_delta == 0) {
-        if (this_rd == INT64_MAX) return 0;
-        rd_cost[1] = this_rd;
-        break;
-      }
-    }
-  }
-
-  assert(best_rd != INT64_MAX);
-  for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
-    int64_t rd_thresh;
-    for (i = 0; i < 2; ++i) {
-      int skip_search = 0;
-      rd_thresh = best_rd + (best_rd >> 5);
-      if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
-          rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
-        skip_search = 1;
-      if (!skip_search) {
-        mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
-        pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
-                                      rate, rd_stats, &best_angle_delta,
-                                      &best_rd);
-      }
-    }
-  }
-
-  mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta;
-  return rd_stats->rate != INT_MAX;
-}
-
-#define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \
-  (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1)
-static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
-                             TX_SIZE tx_size, int64_t best_rd) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-#if CONFIG_DEBUG
-  assert(is_cfl_allowed(xd));
-  const int ssx = xd->plane[AOM_PLANE_U].subsampling_x;
-  const int ssy = xd->plane[AOM_PLANE_U].subsampling_y;
-  const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, ssx, ssy);
-  (void)plane_bsize;
-  assert(plane_bsize < BLOCK_SIZES_ALL);
-  if (!xd->lossless[mbmi->segment_id]) {
-    assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
-    assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
-  }
-#endif  // CONFIG_DEBUG
-
-  xd->cfl.use_dc_pred_cache = 1;
-  const int64_t mode_rd =
-      RDCOST(x->rdmult,
-             x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED], 0);
-  int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
-  int best_c[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
-#if CONFIG_DEBUG
-  int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
-#endif  // CONFIG_DEBUG
-
-  for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
-    RD_STATS rd_stats;
-    av1_init_rd_stats(&rd_stats);
-    for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
-      best_rd_uv[joint_sign][plane] = INT64_MAX;
-      best_c[joint_sign][plane] = 0;
-    }
-    // Collect RD stats for an alpha value of zero in this plane.
-    // Skip i == CFL_SIGN_ZERO as (0, 0) is invalid.
-    for (int i = CFL_SIGN_NEG; i < CFL_SIGNS; i++) {
-      const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, CFL_SIGN_ZERO, i);
-      if (i == CFL_SIGN_NEG) {
-        mbmi->cfl_alpha_idx = 0;
-        mbmi->cfl_alpha_signs = joint_sign;
-        txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize, tx_size,
-                         cpi->sf.use_fast_coef_costing, FTXS_NONE);
-        if (rd_stats.rate == INT_MAX) break;
-      }
-      const int alpha_rate = x->cfl_cost[joint_sign][plane][0];
-      best_rd_uv[joint_sign][plane] =
-          RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist);
-#if CONFIG_DEBUG
-      best_rate_uv[joint_sign][plane] = rd_stats.rate;
-#endif  // CONFIG_DEBUG
-    }
-  }
-
-  int best_joint_sign = -1;
-
-  for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
-    for (int pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) {
-      int progress = 0;
-      for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
-        int flag = 0;
-        RD_STATS rd_stats;
-        if (c > 2 && progress < c) break;
-        av1_init_rd_stats(&rd_stats);
-        for (int i = 0; i < CFL_SIGNS; i++) {
-          const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i);
-          if (i == 0) {
-            mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c;
-            mbmi->cfl_alpha_signs = joint_sign;
-            txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize,
-                             tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE);
-            if (rd_stats.rate == INT_MAX) break;
-          }
-          const int alpha_rate = x->cfl_cost[joint_sign][plane][c];
-          int64_t this_rd =
-              RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist);
-          if (this_rd >= best_rd_uv[joint_sign][plane]) continue;
-          best_rd_uv[joint_sign][plane] = this_rd;
-          best_c[joint_sign][plane] = c;
-#if CONFIG_DEBUG
-          best_rate_uv[joint_sign][plane] = rd_stats.rate;
-#endif  // CONFIG_DEBUG
-          flag = 2;
-          if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue;
-          this_rd += mode_rd + best_rd_uv[joint_sign][!plane];
-          if (this_rd >= best_rd) continue;
-          best_rd = this_rd;
-          best_joint_sign = joint_sign;
-        }
-        progress += flag;
-      }
-    }
-  }
-
-  int best_rate_overhead = INT_MAX;
-  int ind = 0;
-  if (best_joint_sign >= 0) {
-    const int u = best_c[best_joint_sign][CFL_PRED_U];
-    const int v = best_c[best_joint_sign][CFL_PRED_V];
-    ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
-    best_rate_overhead = x->cfl_cost[best_joint_sign][CFL_PRED_U][u] +
-                         x->cfl_cost[best_joint_sign][CFL_PRED_V][v];
-#if CONFIG_DEBUG
-    xd->cfl.rate = x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED] +
-                   best_rate_overhead +
-                   best_rate_uv[best_joint_sign][CFL_PRED_U] +
-                   best_rate_uv[best_joint_sign][CFL_PRED_V];
-#endif  // CONFIG_DEBUG
-  } else {
-    best_joint_sign = 0;
-  }
-
-  mbmi->cfl_alpha_idx = ind;
-  mbmi->cfl_alpha_signs = best_joint_sign;
-  xd->cfl.use_dc_pred_cache = 0;
-  xd->cfl.dc_pred_is_cached[0] = 0;
-  xd->cfl.dc_pred_is_cached[1] = 0;
-  return best_rate_overhead;
-}
-
-static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
-  mbmi->uv_mode = UV_DC_PRED;
-  mbmi->palette_mode_info.palette_size[1] = 0;
-}
-
-static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                       int *rate, int *rate_tokenonly,
-                                       int64_t *distortion, int *skippable,
-                                       BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  assert(!is_inter_block(mbmi));
-  MB_MODE_INFO best_mbmi = *mbmi;
-  int64_t best_rd = INT64_MAX, this_rd;
-
-  for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
-    int this_rate;
-    RD_STATS tokenonly_rd_stats;
-    UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
-    const int is_directional_mode = av1_is_directional_mode(get_uv_mode(mode));
-    if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
-          (1 << mode)))
-      continue;
-
-    mbmi->uv_mode = mode;
-    int cfl_alpha_rate = 0;
-    if (mode == UV_CFL_PRED) {
-      if (!is_cfl_allowed(xd)) continue;
-      assert(!is_directional_mode);
-      const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
-      cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd);
-      if (cfl_alpha_rate == INT_MAX) continue;
-    }
-    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
-    if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
-      const int rate_overhead =
-          x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode];
-      if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
-                                    &this_rate, &tokenonly_rd_stats))
-        continue;
-    } else {
-      if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
-        continue;
-      }
-    }
-    const int mode_cost =
-        x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode] +
-        cfl_alpha_rate;
-    this_rate = tokenonly_rd_stats.rate +
-                intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost);
-    if (mode == UV_CFL_PRED) {
-      assert(is_cfl_allowed(xd));
-#if CONFIG_DEBUG
-      if (!xd->lossless[mbmi->segment_id])
-        assert(xd->cfl.rate == tokenonly_rd_stats.rate + mode_cost);
-#endif  // CONFIG_DEBUG
-    }
-    this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-
-    if (this_rd < best_rd) {
-      best_mbmi = *mbmi;
-      best_rd = this_rd;
-      *rate = this_rate;
-      *rate_tokenonly = tokenonly_rd_stats.rate;
-      *distortion = tokenonly_rd_stats.dist;
-      *skippable = tokenonly_rd_stats.skip;
-    }
-  }
-
-  const int try_palette =
-      av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
-  if (try_palette) {
-    uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
-    rd_pick_palette_intra_sbuv(
-        cpi, x,
-        x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_DC_PRED],
-        best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly,
-        distortion, skippable);
-  }
-
-  *mbmi = best_mbmi;
-  // Make sure we actually chose a mode
-  assert(best_rd < INT64_MAX);
-  return best_rd;
-}
-
-static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                                 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
-                                 int *rate_uv, int *rate_uv_tokenonly,
-                                 int64_t *dist_uv, int *skip_uv,
-                                 UV_PREDICTION_MODE *mode_uv) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
-  const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
-  // Use an estimated rd for uv_intra based on DC_PRED if the
-  // appropriate speed flag is set.
-  init_sbuv_mode(mbmi);
-  if (x->skip_chroma_rd) {
-    *rate_uv = 0;
-    *rate_uv_tokenonly = 0;
-    *dist_uv = 0;
-    *skip_uv = 1;
-    *mode_uv = UV_DC_PRED;
-    return;
-  }
-  xd->cfl.is_chroma_reference =
-      is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
-                          cm->seq_params.subsampling_y);
-  bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
-                             xd->plane[AOM_PLANE_U].subsampling_y);
-  // Only store reconstructed luma when there's chroma RDO. When there's no
-  // chroma RDO, the reconstructed luma will be stored in encode_superblock().
-  xd->cfl.store_y = store_cfl_required_rdo(cm, x);
-  if (xd->cfl.store_y) {
-    // Restore reconstructed luma values.
-    av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y,
-                                 cpi->optimize_seg_arr[mbmi->segment_id],
-                                 mi_row, mi_col);
-    xd->cfl.store_y = 0;
-  }
-  rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
-                          bsize, max_tx_size);
-  *mode_uv = mbmi->uv_mode;
-}
-
-static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode,
-                       int16_t mode_context) {
-  if (is_inter_compound_mode(mode)) {
-    return x
-        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
-  }
-
-  int mode_cost = 0;
-  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
-
-  assert(is_inter_mode(mode));
-
-  if (mode == NEWMV) {
-    mode_cost = x->newmv_mode_cost[mode_ctx][0];
-    return mode_cost;
-  } else {
-    mode_cost = x->newmv_mode_cost[mode_ctx][1];
-    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
-
-    if (mode == GLOBALMV) {
-      mode_cost += x->zeromv_mode_cost[mode_ctx][0];
-      return mode_cost;
-    } else {
-      mode_cost += x->zeromv_mode_cost[mode_ctx][1];
-      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
-      mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
-      return mode_cost;
-    }
-  }
-}
-
-static int get_interinter_compound_mask_rate(const MACROBLOCK *const x,
-                                             const MB_MODE_INFO *const mbmi) {
-  switch (mbmi->interinter_comp.type) {
-    case COMPOUND_AVERAGE: return 0;
-    case COMPOUND_WEDGE:
-      return get_interinter_wedge_bits(mbmi->sb_type) > 0
-                 ? av1_cost_literal(1) +
-                       x->wedge_idx_cost[mbmi->sb_type]
-                                        [mbmi->interinter_comp.wedge_index]
-                 : 0;
-    case COMPOUND_DIFFWTD: return av1_cost_literal(1);
-    default: assert(0); return 0;
-  }
-}
-
-typedef struct {
-  int eobs;
-  int brate;
-  int byrate;
-  int64_t bdist;
-  int64_t bsse;
-  int64_t brdcost;
-  int_mv mvs[2];
-  int_mv pred_mv[2];
-  int_mv ref_mv[2];
-
-  ENTROPY_CONTEXT ta[2];
-  ENTROPY_CONTEXT tl[2];
-} SEG_RDSTAT;
-
-typedef struct {
-  int_mv *ref_mv[2];
-  int_mv mvp;
-
-  int64_t segment_rd;
-  int r;
-  int64_t d;
-  int64_t sse;
-  int segment_yrate;
-  PREDICTION_MODE modes[4];
-  SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
-  int mvthresh;
-} BEST_SEG_INFO;
-
-static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
-  return (mv->row >> 3) < mv_limits->row_min ||
-         (mv->row >> 3) > mv_limits->row_max ||
-         (mv->col >> 3) < mv_limits->col_min ||
-         (mv->col >> 3) > mv_limits->col_max;
-}
-
-static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
-                                              int ref_idx, int is_comp_pred) {
-  PREDICTION_MODE single_mode;
-  if (is_comp_pred) {
-    single_mode =
-        ref_idx ? compound_ref1_mode(this_mode) : compound_ref0_mode(this_mode);
-  } else {
-    single_mode = this_mode;
-  }
-  return single_mode;
-}
-
-static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
-                                BLOCK_SIZE bsize, int_mv *cur_mv, int mi_row,
-                                int mi_col, int_mv *ref_mv_sub8x8[2],
-                                const uint8_t *mask, int mask_stride,
-                                int *rate_mv, const int block) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  const int pw = block_size_wide[bsize];
-  const int ph = block_size_high[bsize];
-  const int plane = 0;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  // This function should only ever be called for compound modes
-  assert(has_second_ref(mbmi));
-  const int_mv init_mv[2] = { cur_mv[0], cur_mv[1] };
-  const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
-  int_mv ref_mv[2];
-  int ite, ref;
-  // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
-  const int ic = block & 1;
-  const int ir = (block - ic) >> 1;
-  struct macroblockd_plane *const pd = &xd->plane[0];
-  const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
-  const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
-
-  ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
-  conv_params.use_jnt_comp_avg = 0;
-  WarpTypesAllowed warp_types[2];
-  for (ref = 0; ref < 2; ++ref) {
-    const WarpedMotionParams *const wm =
-        &xd->global_motion[xd->mi[0]->ref_frame[ref]];
-    const int is_global = is_global_mv_block(xd->mi[0], wm->wmtype);
-    warp_types[ref].global_warp_allowed = is_global;
-    warp_types[ref].local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
-  }
-
-  // Do joint motion search in compound mode to get more accurate mv.
-  struct buf_2d backup_yv12[2][MAX_MB_PLANE];
-  int last_besterr[2] = { INT_MAX, INT_MAX };
-  const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
-    av1_get_scaled_ref_frame(cpi, refs[0]),
-    av1_get_scaled_ref_frame(cpi, refs[1])
-  };
-
-  // Prediction buffer from second frame.
-  DECLARE_ALIGNED(16, uint8_t, second_pred16[MAX_SB_SQUARE * sizeof(uint16_t)]);
-  uint8_t *second_pred = get_buf_by_bd(xd, second_pred16);
-  (void)ref_mv_sub8x8;
-
-  const int have_newmv = have_nearmv_in_inter_mode(mbmi->mode);
-  const int ref_mv_idx = mbmi->ref_mv_idx + (have_newmv ? 1 : 0);
-  MV *const best_mv = &x->best_mv.as_mv;
-  const int search_range = SEARCH_RANGE_8P;
-  const int sadpb = x->sadperbit16;
-  // Allow joint search multiple times iteratively for each reference frame
-  // and break out of the search loop if it couldn't find a better mv.
-  for (ite = 0; ite < 4; ite++) {
-    struct buf_2d ref_yv12[2];
-    int bestsme = INT_MAX;
-    MvLimits tmp_mv_limits = x->mv_limits;
-    int id = ite % 2;  // Even iterations search in the first reference frame,
-                       // odd iterations search in the second. The predictor
-                       // found for the 'other' reference frame is factored in.
-    if (ite >= 2 && cur_mv[!id].as_int == init_mv[!id].as_int) {
-      if (cur_mv[id].as_int == init_mv[id].as_int) {
-        break;
-      } else {
-        int_mv cur_int_mv, init_int_mv;
-        cur_int_mv.as_mv.col = cur_mv[id].as_mv.col >> 3;
-        cur_int_mv.as_mv.row = cur_mv[id].as_mv.col >> 3;
-        init_int_mv.as_mv.row = init_mv[id].as_mv.row >> 3;
-        init_int_mv.as_mv.col = init_mv[id].as_mv.col >> 3;
-        if (cur_int_mv.as_int == init_int_mv.as_int) {
-          break;
-        }
-      }
-    }
-    for (ref = 0; ref < 2; ++ref) {
-      ref_mv[ref] = av1_get_ref_mv(x, ref);
-      // Swap out the reference frame for a version that's been scaled to
-      // match the resolution of the current frame, allowing the existing
-      // motion search code to be used without additional modifications.
-      if (scaled_ref_frame[ref]) {
-        int i;
-        for (i = 0; i < num_planes; i++)
-          backup_yv12[ref][i] = xd->plane[i].pre[ref];
-        av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
-                             NULL, num_planes);
-      }
-    }
-
-    assert(IMPLIES(scaled_ref_frame[0] != NULL,
-                   cm->width == scaled_ref_frame[0]->y_crop_width &&
-                       cm->height == scaled_ref_frame[0]->y_crop_height));
-    assert(IMPLIES(scaled_ref_frame[1] != NULL,
-                   cm->width == scaled_ref_frame[1]->y_crop_width &&
-                       cm->height == scaled_ref_frame[1]->y_crop_height));
-
-    // Initialize based on (possibly scaled) prediction buffers.
-    ref_yv12[0] = xd->plane[plane].pre[0];
-    ref_yv12[1] = xd->plane[plane].pre[1];
-
-    // Get the prediction block from the 'other' reference frame.
-    const InterpFilters interp_filters = EIGHTTAP_REGULAR;
-
-    // Since we have scaled the reference frames to match the size of the
-    // current frame we must use a unit scaling factor during mode selection.
-    av1_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
-                              second_pred, pw, &cur_mv[!id].as_mv,
-                              &cm->sf_identity, pw, ph, &conv_params,
-                              interp_filters, &warp_types[!id], p_col, p_row,
-                              plane, !id, MV_PRECISION_Q3, mi_col * MI_SIZE,
-                              mi_row * MI_SIZE, xd, cm->allow_warped_motion);
-
-    const int order_idx = id != 0;
-    av1_jnt_comp_weight_assign(cm, mbmi, order_idx, &xd->jcp_param.fwd_offset,
-                               &xd->jcp_param.bck_offset,
-                               &xd->jcp_param.use_jnt_comp_avg, 1);
-
-    // Do full-pixel compound motion search on the current reference frame.
-    if (id) xd->plane[plane].pre[0] = ref_yv12[id];
-    av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
-
-    // Use the mv result from the single mode as mv predictor.
-    *best_mv = cur_mv[id].as_mv;
-
-    best_mv->col >>= 3;
-    best_mv->row >>= 3;
-
-    av1_set_mvcost(x, id, ref_mv_idx);
-
-    // Small-range full-pixel motion search.
-    bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
-                                       &cpi->fn_ptr[bsize], mask, mask_stride,
-                                       id, &ref_mv[id].as_mv, second_pred);
-    if (bestsme < INT_MAX) {
-      if (mask)
-        bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
-                                          second_pred, mask, mask_stride, id,
-                                          &cpi->fn_ptr[bsize], 1);
-      else
-        bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
-                                        second_pred, &cpi->fn_ptr[bsize], 1);
-    }
-
-    x->mv_limits = tmp_mv_limits;
-
-    // Restore the pointer to the first (possibly scaled) prediction buffer.
-    if (id) xd->plane[plane].pre[0] = ref_yv12[0];
-
-    for (ref = 0; ref < 2; ++ref) {
-      if (scaled_ref_frame[ref]) {
-        // Swap back the original buffers for subpel motion search.
-        for (int i = 0; i < num_planes; i++) {
-          xd->plane[i].pre[ref] = backup_yv12[ref][i];
-        }
-        // Re-initialize based on unscaled prediction buffers.
-        ref_yv12[ref] = xd->plane[plane].pre[ref];
-      }
-    }
-
-    // Do sub-pixel compound motion search on the current reference frame.
-    if (id) xd->plane[plane].pre[0] = ref_yv12[id];
-
-    if (cpi->common.cur_frame_force_integer_mv) {
-      x->best_mv.as_mv.row *= 8;
-      x->best_mv.as_mv.col *= 8;
-    }
-    if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0) {
-      int dis; /* TODO: use dis in distortion calculation later. */
-      unsigned int sse;
-      bestsme = cpi->find_fractional_mv_step(
-          x, cm, mi_row, mi_col, &ref_mv[id].as_mv,
-          cpi->common.allow_high_precision_mv, x->errorperbit,
-          &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
-          x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
-          mask_stride, id, pw, ph, cpi->sf.use_accurate_subpel_search);
-    }
-
-    // Restore the pointer to the first prediction buffer.
-    if (id) xd->plane[plane].pre[0] = ref_yv12[0];
-    if (bestsme < last_besterr[id]) {
-      cur_mv[id].as_mv = *best_mv;
-      last_besterr[id] = bestsme;
-    } else {
-      break;
-    }
-  }
-
-  *rate_mv = 0;
-
-  for (ref = 0; ref < 2; ++ref) {
-    av1_set_mvcost(x, ref, ref_mv_idx);
-    const int_mv curr_ref_mv = av1_get_ref_mv(x, ref);
-    *rate_mv += av1_mv_bit_cost(&cur_mv[ref].as_mv, &curr_ref_mv.as_mv,
-                                x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-  }
-}
-
-static void estimate_ref_frame_costs(
-    const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x,
-    int segment_id, unsigned int *ref_costs_single,
-    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
-  int seg_ref_active =
-      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
-  if (seg_ref_active) {
-    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
-    int ref_frame;
-    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
-      memset(ref_costs_comp[ref_frame], 0,
-             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
-  } else {
-    int intra_inter_ctx = av1_get_intra_inter_context(xd);
-    ref_costs_single[INTRA_FRAME] = x->intra_inter_cost[intra_inter_ctx][0];
-    unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1];
-
-    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
-      ref_costs_single[i] = base_cost;
-
-    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
-    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
-    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
-    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
-    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
-    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
-
-    // Determine cost of a single ref frame, where frame types are represented
-    // by a tree:
-    // Level 0: add cost whether this ref is a forward or backward ref
-    ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p1][0][0];
-    ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p1][0][0];
-    ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p1][0][0];
-    ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p1][0][0];
-    ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p1][0][1];
-    ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p1][0][1];
-    ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p1][0][1];
-
-    // Level 1: if this ref is forward ref,
-    // add cost whether it is last/last2 or last3/golden
-    ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p3][2][0];
-    ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p3][2][0];
-    ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p3][2][1];
-    ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p3][2][1];
-
-    // Level 1: if this ref is backward ref
-    // then add cost whether this ref is altref or backward ref
-    ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p2][1][0];
-    ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p2][1][0];
-    ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p2][1][1];
-
-    // Level 2: further add cost whether this ref is last or last2
-    ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p4][3][0];
-    ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p4][3][1];
-
-    // Level 2: last3 or golden
-    ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p5][4][0];
-    ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p5][4][1];
-
-    // Level 2: bwdref or altref2
-    ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p6][5][0];
-    ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p6][5][1];
-
-    if (cm->reference_mode != SINGLE_REFERENCE) {
-      // Similar to single ref, determine cost of compound ref frames.
-      // cost_compound_refs = cost_first_ref + cost_second_ref
-      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
-      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
-      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
-      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
-      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
-
-      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
-      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
-
-      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
-          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
-              base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][1];
-      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
-      ref_bicomp_costs[ALTREF_FRAME] = 0;
-
-      // cost of first ref frame
-      ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
-      ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
-      ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
-      ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
-
-      ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][0];
-      ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][1];
-
-      ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][0];
-      ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][1];
-
-      // cost of second ref frame
-      ref_bicomp_costs[BWDREF_FRAME] +=
-          x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
-      ref_bicomp_costs[ALTREF2_FRAME] +=
-          x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
-      ref_bicomp_costs[ALTREF_FRAME] +=
-          x->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
-
-      ref_bicomp_costs[BWDREF_FRAME] +=
-          x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
-      ref_bicomp_costs[ALTREF2_FRAME] +=
-          x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
-
-      // cost: if one ref frame is forward ref, the other ref is backward ref
-      int ref0, ref1;
-      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
-        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
-          ref_costs_comp[ref0][ref1] =
-              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
-        }
-      }
-
-      // cost: if both ref frames are the same side.
-      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
-      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
-      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
-      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
-          base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
-      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
-          base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
-      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
-          base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
-      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
-          base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
-          x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
-    } else {
-      int ref0, ref1;
-      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
-        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
-          ref_costs_comp[ref0][ref1] = 512;
-      }
-      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
-      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
-      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
-      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
-    }
-  }
-}
-
-static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
-                                 int mode_index,
-                                 int64_t comp_pred_diff[REFERENCE_MODES],
-                                 int skippable) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-
-  // Take a snapshot of the coding context so it can be
-  // restored if we decide to encode this way
-  ctx->skip = x->skip;
-  ctx->skippable = skippable;
-  ctx->best_mode_index = mode_index;
-  ctx->mic = *xd->mi[0];
-  ctx->mbmi_ext = *x->mbmi_ext;
-  ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
-  ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
-  ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
-}
-
-static void setup_buffer_ref_mvs_inter(
-    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
-    BLOCK_SIZE block_size, int mi_row, int mi_col,
-    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
-  const AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
-  MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-
-  assert(yv12 != NULL);
-
-  // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
-  // use the UV scaling factors.
-  av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf,
-                       num_planes);
-
-  // Gets an initial list of candidate vectors from neighbours and orders them
-  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
-                   mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row,
-                   mi_col, mbmi_ext->mode_context);
-
-  // Further refinement that is encode side only to test the top few candidates
-  // in full and choose the best as the centre point for subsequent searches.
-  // The current implementation doesn't support scaling.
-  (void)block_size;
-  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
-              block_size);
-}
-
-static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                 int ref_idx, int *rate_mv) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  const AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
-  int bestsme = INT_MAX;
-  int step_param;
-  int sadpb = x->sadperbit16;
-  MV mvp_full;
-  int ref = mbmi->ref_frame[ref_idx];
-  MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
-
-  MvLimits tmp_mv_limits = x->mv_limits;
-  int cost_list[5];
-
-  const YV12_BUFFER_CONFIG *scaled_ref_frame =
-      av1_get_scaled_ref_frame(cpi, ref);
-
-  if (scaled_ref_frame) {
-    // Swap out the reference frame for a version that's been scaled to
-    // match the resolution of the current frame, allowing the existing
-    // full-pixel motion search code to be used without additional
-    // modifications.
-    for (int i = 0; i < num_planes; i++) {
-      backup_yv12[i] = xd->plane[i].pre[ref_idx];
-    }
-    av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
-                         num_planes);
-  }
-
-  av1_set_mvcost(
-      x, ref_idx,
-      mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
-
-  // Work out the size of the first step in the mv step search.
-  // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
-  if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
-    // Take the weighted average of the step_params based on the last frame's
-    // max mv magnitude and that based on the best ref mvs of the current
-    // block for the given reference.
-    step_param =
-        (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
-        2;
-  } else {
-    step_param = cpi->mv_step_param;
-  }
-
-  if (cpi->sf.adaptive_motion_search && bsize < cm->seq_params.sb_size) {
-    int boffset =
-        2 * (mi_size_wide_log2[cm->seq_params.sb_size] -
-             AOMMIN(mi_size_high_log2[bsize], mi_size_wide_log2[bsize]));
-    step_param = AOMMAX(step_param, boffset);
-  }
-
-  if (cpi->sf.adaptive_motion_search) {
-    int bwl = mi_size_wide_log2[bsize];
-    int bhl = mi_size_high_log2[bsize];
-    int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
-
-    if (tlevel < 5) {
-      step_param += 2;
-      step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1);
-    }
-
-    // prev_mv_sad is not setup for dynamically scaled frames.
-    if (cpi->oxcf.resize_mode != RESIZE_RANDOM) {
-      int i;
-      for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
-        if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
-          x->pred_mv[ref].row = 0;
-          x->pred_mv[ref].col = 0;
-          x->best_mv.as_int = INVALID_MV;
-
-          if (scaled_ref_frame) {
-            // Swap back the original buffers before returning.
-            for (int j = 0; j < num_planes; ++j)
-              xd->plane[j].pre[ref_idx] = backup_yv12[j];
-          }
-          return;
-        }
-      }
-    }
-  }
-
-  // Note: MV limits are modified here. Always restore the original values
-  // after full-pixel motion search.
-  av1_set_mv_search_range(&x->mv_limits, &ref_mv);
-
-  if (mbmi->motion_mode != SIMPLE_TRANSLATION)
-    mvp_full = mbmi->mv[0].as_mv;
-  else
-    mvp_full = ref_mv;
-
-  mvp_full.col >>= 3;
-  mvp_full.row >>= 3;
-
-  x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
-
-  switch (mbmi->motion_mode) {
-    case SIMPLE_TRANSLATION:
-      bestsme = av1_full_pixel_search(
-          cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0,
-          sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1,
-          (MI_SIZE * mi_col), (MI_SIZE * mi_row), 0);
-      break;
-    case OBMC_CAUSAL:
-      bestsme = av1_obmc_full_pixel_search(cpi, x, &mvp_full, step_param, sadpb,
-                                           MAX_MVSEARCH_STEPS - 1 - step_param,
-                                           1, &cpi->fn_ptr[bsize], &ref_mv,
-                                           &(x->best_mv.as_mv), 0);
-      break;
-    default: assert(0 && "Invalid motion mode!\n");
-  }
-
-  if (scaled_ref_frame) {
-    // Swap back the original buffers for subpel motion search.
-    for (int i = 0; i < num_planes; i++) {
-      xd->plane[i].pre[ref_idx] = backup_yv12[i];
-    }
-  }
-
-  x->mv_limits = tmp_mv_limits;
-
-  if (cpi->common.cur_frame_force_integer_mv) {
-    x->best_mv.as_mv.row *= 8;
-    x->best_mv.as_mv.col *= 8;
-  }
-  const int use_fractional_mv =
-      bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0;
-  if (use_fractional_mv) {
-    int dis; /* TODO: use dis in distortion calculation later. */
-    switch (mbmi->motion_mode) {
-      case SIMPLE_TRANSLATION:
-        if (cpi->sf.use_accurate_subpel_search) {
-          int best_mv_var;
-          const int try_second = x->second_best_mv.as_int != INVALID_MV &&
-                                 x->second_best_mv.as_int != x->best_mv.as_int;
-          const int pw = block_size_wide[bsize];
-          const int ph = block_size_high[bsize];
-
-          best_mv_var = cpi->find_fractional_mv_step(
-              x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
-              x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
-              cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
-              x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
-              0, 0, pw, ph, 1);
-
-          if (try_second) {
-            const int minc =
-                AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
-            const int maxc =
-                AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
-            const int minr =
-                AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
-            const int maxr =
-                AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
-            int this_var;
-            MV best_mv = x->best_mv.as_mv;
-
-            x->best_mv = x->second_best_mv;
-            if (x->best_mv.as_mv.row * 8 <= maxr &&
-                x->best_mv.as_mv.row * 8 >= minr &&
-                x->best_mv.as_mv.col * 8 <= maxc &&
-                x->best_mv.as_mv.col * 8 >= minc) {
-              this_var = cpi->find_fractional_mv_step(
-                  x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
-                  x->errorperbit, &cpi->fn_ptr[bsize],
-                  cpi->sf.mv.subpel_force_stop,
-                  cpi->sf.mv.subpel_iters_per_step,
-                  cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
-                  &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1);
-              if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
-              x->best_mv.as_mv = best_mv;
-            }
-          }
-        } else {
-          cpi->find_fractional_mv_step(
-              x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
-              x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
-              cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
-              x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
-              0, 0, 0, 0, 0);
-        }
-        break;
-      case OBMC_CAUSAL:
-        av1_find_best_obmc_sub_pixel_tree_up(
-            x, cm, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
-            cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
-            cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
-            x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
-            cpi->sf.use_accurate_subpel_search);
-        break;
-      default: assert(0 && "Invalid motion mode!\n");
-    }
-  }
-  *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
-                             x->mvcost, MV_COST_WEIGHT);
-
-  if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
-    x->pred_mv[ref] = x->best_mv.as_mv;
-}
-
-static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst,
-                                   const int num_planes) {
-  int i;
-  for (i = 0; i < num_planes; i++) {
-    xd->plane[i].dst.buf = dst.plane[i];
-    xd->plane[i].dst.stride = dst.stride[i];
-  }
-}
-
-static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
-                                    BLOCK_SIZE bsize, const MV *other_mv,
-                                    int mi_row, int mi_col, const int block,
-                                    int ref_idx, uint8_t *second_pred) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int pw = block_size_wide[bsize];
-  const int ph = block_size_high[bsize];
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const int other_ref = mbmi->ref_frame[!ref_idx];
-  struct macroblockd_plane *const pd = &xd->plane[0];
-  // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
-  const int ic = block & 1;
-  const int ir = (block - ic) >> 1;
-  const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
-  const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
-  const WarpedMotionParams *const wm = &xd->global_motion[other_ref];
-  int is_global = is_global_mv_block(xd->mi[0], wm->wmtype);
-
-  // This function should only ever be called for compound modes
-  assert(has_second_ref(mbmi));
-
-  const int plane = 0;
-  struct buf_2d ref_yv12 = xd->plane[plane].pre[!ref_idx];
-
-  struct scale_factors sf;
-  av1_setup_scale_factors_for_frame(&sf, ref_yv12.width, ref_yv12.height,
-                                    cm->width, cm->height);
-
-  ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
-  WarpTypesAllowed warp_types;
-  warp_types.global_warp_allowed = is_global;
-  warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
-
-  // Get the prediction block from the 'other' reference frame.
-  av1_build_inter_predictor(ref_yv12.buf, ref_yv12.stride, second_pred, pw,
-                            other_mv, &sf, pw, ph, &conv_params,
-                            mbmi->interp_filters, &warp_types, p_col, p_row,
-                            plane, !ref_idx, MV_PRECISION_Q3, mi_col * MI_SIZE,
-                            mi_row * MI_SIZE, xd, cm->allow_warped_motion);
-
-  av1_jnt_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset,
-                             &xd->jcp_param.bck_offset,
-                             &xd->jcp_param.use_jnt_comp_avg, 1);
-}
-
-// Search for the best mv for one component of a compound,
-// given that the other component is fixed.
-static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
-                                          BLOCK_SIZE bsize, MV *this_mv,
-                                          int mi_row, int mi_col,
-                                          const uint8_t *second_pred,
-                                          const uint8_t *mask, int mask_stride,
-                                          int *rate_mv, int ref_idx) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  const int pw = block_size_wide[bsize];
-  const int ph = block_size_high[bsize];
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const int ref = mbmi->ref_frame[ref_idx];
-  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
-  struct macroblockd_plane *const pd = &xd->plane[0];
-
-  struct buf_2d backup_yv12[MAX_MB_PLANE];
-  const YV12_BUFFER_CONFIG *const scaled_ref_frame =
-      av1_get_scaled_ref_frame(cpi, ref);
-
-  // Check that this is either an interinter or an interintra block
-  assert(has_second_ref(mbmi) || (ref_idx == 0 && is_interintra_mode(mbmi)));
-
-  // Store the first prediction buffer.
-  struct buf_2d orig_yv12;
-  if (ref_idx) {
-    orig_yv12 = pd->pre[0];
-    pd->pre[0] = pd->pre[ref_idx];
-  }
-
-  if (scaled_ref_frame) {
-    int i;
-    // Swap out the reference frame for a version that's been scaled to
-    // match the resolution of the current frame, allowing the existing
-    // full-pixel motion search code to be used without additional
-    // modifications.
-    for (i = 0; i < num_planes; i++) backup_yv12[i] = xd->plane[i].pre[ref_idx];
-    av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
-                         num_planes);
-  }
-
-  int bestsme = INT_MAX;
-  int sadpb = x->sadperbit16;
-  MV *const best_mv = &x->best_mv.as_mv;
-  int search_range = SEARCH_RANGE_8P;
-
-  MvLimits tmp_mv_limits = x->mv_limits;
-
-  // Do compound motion search on the current reference frame.
-  av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
-
-  // Use the mv result from the single mode as mv predictor.
-  *best_mv = *this_mv;
-
-  best_mv->col >>= 3;
-  best_mv->row >>= 3;
-
-  av1_set_mvcost(
-      x, ref_idx,
-      mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
-
-  // Small-range full-pixel motion search.
-  bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
-                                     &cpi->fn_ptr[bsize], mask, mask_stride,
-                                     ref_idx, &ref_mv.as_mv, second_pred);
-  if (bestsme < INT_MAX) {
-    if (mask)
-      bestsme =
-          av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
-                                  mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
-    else
-      bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
-                                      &cpi->fn_ptr[bsize], 1);
-  }
-
-  x->mv_limits = tmp_mv_limits;
-
-  if (scaled_ref_frame) {
-    // Swap back the original buffers for subpel motion search.
-    for (int i = 0; i < num_planes; i++) {
-      xd->plane[i].pre[ref_idx] = backup_yv12[i];
-    }
-  }
-
-  if (cpi->common.cur_frame_force_integer_mv) {
-    x->best_mv.as_mv.row *= 8;
-    x->best_mv.as_mv.col *= 8;
-  }
-  const int use_fractional_mv =
-      bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0;
-  if (use_fractional_mv) {
-    int dis; /* TODO: use dis in distortion calculation later. */
-    unsigned int sse;
-    bestsme = cpi->find_fractional_mv_step(
-        x, cm, mi_row, mi_col, &ref_mv.as_mv,
-        cpi->common.allow_high_precision_mv, x->errorperbit,
-        &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
-        x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
-        ref_idx, pw, ph, cpi->sf.use_accurate_subpel_search);
-  }
-
-  // Restore the pointer to the first unscaled prediction buffer.
-  if (ref_idx) pd->pre[0] = orig_yv12;
-
-  if (bestsme < INT_MAX) *this_mv = *best_mv;
-
-  *rate_mv = 0;
-
-  av1_set_mvcost(
-      x, ref_idx,
-      mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
-  *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
-                              x->mvcost, MV_COST_WEIGHT);
-}
-
-// Wrapper for compound_single_motion_search, for the common case
-// where the second prediction is also an inter mode.
-static void compound_single_motion_search_interinter(
-    const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *cur_mv,
-    int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
-    const int block, int ref_idx) {
-  MACROBLOCKD *xd = &x->e_mbd;
-  // This function should only ever be called for compound modes
-  assert(has_second_ref(xd->mi[0]));
-
-  // Prediction buffer from second frame.
-  DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
-  uint8_t *second_pred;
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-    second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
-  else
-    second_pred = (uint8_t *)second_pred_alloc_16;
-
-  MV *this_mv = &cur_mv[ref_idx].as_mv;
-  const MV *other_mv = &cur_mv[!ref_idx].as_mv;
-
-  build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
-                          ref_idx, second_pred);
-
-  compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
-                                second_pred, mask, mask_stride, rate_mv,
-                                ref_idx);
-}
-
-static void do_masked_motion_search_indexed(
-    const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
-    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
-    int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
-  // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  BLOCK_SIZE sb_type = mbmi->sb_type;
-  const uint8_t *mask;
-  const int mask_stride = block_size_wide[bsize];
-
-  mask = av1_get_compound_type_mask(comp_data, sb_type);
-
-  tmp_mv[0].as_int = cur_mv[0].as_int;
-  tmp_mv[1].as_int = cur_mv[1].as_int;
-  if (which == 0 || which == 1) {
-    compound_single_motion_search_interinter(cpi, x, bsize, tmp_mv, mi_row,
-                                             mi_col, mask, mask_stride, rate_mv,
-                                             0, which);
-  } else if (which == 2) {
-    joint_motion_search(cpi, x, bsize, tmp_mv, mi_row, mi_col, NULL, mask,
-                        mask_stride, rate_mv, 0);
-  }
-}
-
-#define USE_DISCOUNT_NEWMV_TEST 0
-#if USE_DISCOUNT_NEWMV_TEST
-// In some situations we want to discount the apparent cost of a new motion
-// vector. Where there is a subtle motion field and especially where there is
-// low spatial complexity then it can be hard to cover the cost of a new motion
-// vector in a single block, even if that motion vector reduces distortion.
-// However, once established that vector may be usable through the nearest and
-// near mv modes to reduce distortion in subsequent blocks and also improve
-// visual quality.
-#define NEW_MV_DISCOUNT_FACTOR 8
-static INLINE void get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
-                               int ref_idx, int ref_mv_idx,
-                               const MV_REFERENCE_FRAME *ref_frame,
-                               const MB_MODE_INFO_EXT *mbmi_ext);
-static int discount_newmv_test(const AV1_COMP *const cpi, const MACROBLOCK *x,
-                               PREDICTION_MODE this_mode, int_mv this_mv) {
-  if (this_mode == NEWMV && this_mv.as_int != 0 &&
-      !cpi->rc.is_src_frame_alt_ref) {
-    // Only discount new_mv when nearst_mv and all near_mv are zero, and the
-    // new_mv is not equal to global_mv
-    const AV1_COMMON *const cm = &cpi->common;
-    const MACROBLOCKD *const xd = &x->e_mbd;
-    const MB_MODE_INFO *const mbmi = xd->mi[0];
-    const MV_REFERENCE_FRAME tmp_ref_frames[2] = { mbmi->ref_frame[0],
-                                                   NONE_FRAME };
-    const uint8_t ref_frame_type = av1_ref_frame_type(tmp_ref_frames);
-    int_mv nearest_mv;
-    get_this_mv(&nearest_mv, NEARESTMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
-    int ret = nearest_mv.as_int == 0;
-    for (int ref_mv_idx = 0;
-         ref_mv_idx < x->mbmi_ext->ref_mv_count[ref_frame_type]; ++ref_mv_idx) {
-      int_mv near_mv;
-      get_this_mv(&near_mv, NEARMV, 0, ref_mv_idx, tmp_ref_frames, x->mbmi_ext);
-      ret &= near_mv.as_int == 0;
-    }
-    if (cm->global_motion[tmp_ref_frames[0]].wmtype <= TRANSLATION) {
-      int_mv global_mv;
-      get_this_mv(&global_mv, GLOBALMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
-      ret &= global_mv.as_int != this_mv.as_int;
-    }
-    return ret;
-  }
-  return 0;
-}
-#endif
-
-#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
-#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
-
-// TODO(jingning): this mv clamping function should be block size dependent.
-static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
-  clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
-           xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
-           xd->mb_to_top_edge - LEFT_TOP_MARGIN,
-           xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
-}
-
-static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
-                               const BLOCK_SIZE bsize, const uint8_t *pred0,
-                               int stride0, const uint8_t *pred1, int stride1) {
-  static const BLOCK_SIZE split_qtr[BLOCK_SIZES_ALL] = {
-    //                            4X4
-    BLOCK_INVALID,
-    // 4X8,        8X4,           8X8
-    BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
-    // 8X16,       16X8,          16X16
-    BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
-    // 16X32,      32X16,         32X32
-    BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
-    // 32X64,      64X32,         64X64
-    BLOCK_16X32, BLOCK_32X16, BLOCK_32X32,
-    // 64x128,     128x64,        128x128
-    BLOCK_32X64, BLOCK_64X32, BLOCK_64X64,
-    // 4X16,       16X4,          8X32
-    BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16,
-    // 32X8,       16X64,         64X16
-    BLOCK_16X4, BLOCK_8X32, BLOCK_32X8
-  };
-  const struct macroblock_plane *const p = &x->plane[0];
-  const uint8_t *src = p->src.buf;
-  int src_stride = p->src.stride;
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-  uint32_t esq[2][4];
-  int64_t tl, br;
-
-  const BLOCK_SIZE f_index = split_qtr[bsize];
-  assert(f_index != BLOCK_INVALID);
-
-  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    pred0 = CONVERT_TO_BYTEPTR(pred0);
-    pred1 = CONVERT_TO_BYTEPTR(pred1);
-  }
-
-  cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
-  cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
-                          &esq[0][1]);
-  cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
-                          pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
-  cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
-                          pred0 + bh / 2 * stride0 + bw / 2, stride0,
-                          &esq[0][3]);
-  cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
-  cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
-                          &esq[1][1]);
-  cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
-                          pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
-  cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
-                          pred1 + bh / 2 * stride1 + bw / 2, stride0,
-                          &esq[1][3]);
-
-  tl = ((int64_t)esq[0][0] + esq[0][1] + esq[0][2]) -
-       ((int64_t)esq[1][0] + esq[1][1] + esq[1][2]);
-  br = ((int64_t)esq[1][3] + esq[1][1] + esq[1][2]) -
-       ((int64_t)esq[0][3] + esq[0][1] + esq[0][2]);
-  return (tl + br > 0);
-}
-
-// Choose the best wedge index and sign
-static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
-                          const BLOCK_SIZE bsize, const uint8_t *const p0,
-                          const int16_t *const residual1,
-                          const int16_t *const diff10,
-                          int *const best_wedge_sign,
-                          int *const best_wedge_index) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const struct buf_2d *const src = &x->plane[0].src;
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-  const int N = bw * bh;
-  assert(N >= 64);
-  int rate;
-  int64_t dist;
-  int64_t rd, best_rd = INT64_MAX;
-  int wedge_index;
-  int wedge_sign;
-  int wedge_types = (1 << get_wedge_bits_lookup(bsize));
-  const uint8_t *mask;
-  uint64_t sse;
-  const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
-  const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
-
-  DECLARE_ALIGNED(32, int16_t, residual0[MAX_SB_SQUARE]);  // src - pred0
-  if (hbd) {
-    aom_highbd_subtract_block(bh, bw, residual0, bw, src->buf, src->stride,
-                              CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
-  } else {
-    aom_subtract_block(bh, bw, residual0, bw, src->buf, src->stride, p0, bw);
-  }
-
-  int64_t sign_limit = ((int64_t)aom_sum_squares_i16(residual0, N) -
-                        (int64_t)aom_sum_squares_i16(residual1, N)) *
-                       (1 << WEDGE_WEIGHT_BITS) / 2;
-  int16_t *ds = residual0;
-
-  av1_wedge_compute_delta_squares(ds, residual0, residual1, N);
-
-  for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
-    mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
-
-    wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
-
-    mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
-    sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N);
-    sse = ROUND_POWER_OF_TWO(sse, bd_round);
-
-    model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N,
-                                                  &rate, &dist);
-    // int rate2;
-    // int64_t dist2;
-    // model_rd_with_curvfit(cpi, x, bsize, 0, sse, N, &rate2, &dist2);
-    // printf("sse %"PRId64": leagacy: %d %"PRId64", curvfit %d %"PRId64"\n",
-    // sse, rate, dist, rate2, dist2); dist = dist2;
-    // rate = rate2;
-
-    rate += x->wedge_idx_cost[bsize][wedge_index];
-    rd = RDCOST(x->rdmult, rate, dist);
-
-    if (rd < best_rd) {
-      *best_wedge_index = wedge_index;
-      *best_wedge_sign = wedge_sign;
-      best_rd = rd;
-    }
-  }
-
-  return best_rd -
-         RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0);
-}
-
-// Choose the best wedge index the specified sign
-static int64_t pick_wedge_fixed_sign(const AV1_COMP *const cpi,
-                                     const MACROBLOCK *const x,
-                                     const BLOCK_SIZE bsize,
-                                     const int16_t *const residual1,
-                                     const int16_t *const diff10,
-                                     const int wedge_sign,
-                                     int *const best_wedge_index) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-  const int N = bw * bh;
-  assert(N >= 64);
-  int rate;
-  int64_t dist;
-  int64_t rd, best_rd = INT64_MAX;
-  int wedge_index;
-  int wedge_types = (1 << get_wedge_bits_lookup(bsize));
-  const uint8_t *mask;
-  uint64_t sse;
-  const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
-  const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
-  for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
-    mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
-    sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N);
-    sse = ROUND_POWER_OF_TWO(sse, bd_round);
-
-    model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N,
-                                                  &rate, &dist);
-    rate += x->wedge_idx_cost[bsize][wedge_index];
-    rd = RDCOST(x->rdmult, rate, dist);
-
-    if (rd < best_rd) {
-      *best_wedge_index = wedge_index;
-      best_rd = rd;
-    }
-  }
-  return best_rd -
-         RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0);
-}
-
-static int64_t pick_interinter_wedge(
-    const AV1_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize,
-    const uint8_t *const p0, const uint8_t *const p1,
-    const int16_t *const residual1, const int16_t *const diff10) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int bw = block_size_wide[bsize];
-
-  int64_t rd;
-  int wedge_index = -1;
-  int wedge_sign = 0;
-
-  assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
-  assert(cpi->common.seq_params.enable_masked_compound);
-
-  if (cpi->sf.fast_wedge_sign_estimate) {
-    wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
-    rd = pick_wedge_fixed_sign(cpi, x, bsize, residual1, diff10, wedge_sign,
-                               &wedge_index);
-  } else {
-    rd = pick_wedge(cpi, x, bsize, p0, residual1, diff10, &wedge_sign,
-                    &wedge_index);
-  }
-
-  mbmi->interinter_comp.wedge_sign = wedge_sign;
-  mbmi->interinter_comp.wedge_index = wedge_index;
-  return rd;
-}
-
-static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
-                                   MACROBLOCK *const x, const BLOCK_SIZE bsize,
-                                   const uint8_t *const p0,
-                                   const uint8_t *const p1,
-                                   const int16_t *const residual1,
-                                   const int16_t *const diff10) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-  const int N = 1 << num_pels_log2_lookup[bsize];
-  int rate;
-  int64_t dist;
-  DIFFWTD_MASK_TYPE cur_mask_type;
-  int64_t best_rd = INT64_MAX;
-  DIFFWTD_MASK_TYPE best_mask_type = 0;
-  const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
-  const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
-  DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
-  uint8_t *tmp_mask[2] = { xd->seg_mask, seg_mask };
-  // try each mask type and its inverse
-  for (cur_mask_type = 0; cur_mask_type < DIFFWTD_MASK_TYPES; cur_mask_type++) {
-    // build mask and inverse
-    if (hbd)
-      av1_build_compound_diffwtd_mask_highbd(
-          tmp_mask[cur_mask_type], cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
-          CONVERT_TO_BYTEPTR(p1), bw, bh, bw, xd->bd);
-    else
-      av1_build_compound_diffwtd_mask(tmp_mask[cur_mask_type], cur_mask_type,
-                                      p0, bw, p1, bw, bh, bw);
-
-    // compute rd for mask
-    uint64_t sse = av1_wedge_sse_from_residuals(residual1, diff10,
-                                                tmp_mask[cur_mask_type], N);
-    sse = ROUND_POWER_OF_TWO(sse, bd_round);
-
-    model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N,
-                                                  &rate, &dist);
-    const int64_t rd0 = RDCOST(x->rdmult, rate, dist);
-
-    if (rd0 < best_rd) {
-      best_mask_type = cur_mask_type;
-      best_rd = rd0;
-    }
-  }
-  mbmi->interinter_comp.mask_type = best_mask_type;
-  if (best_mask_type == DIFFWTD_38_INV) {
-    memcpy(xd->seg_mask, seg_mask, N * 2);
-  }
-  return best_rd;
-}
-
-static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
-                                     const MACROBLOCK *const x,
-                                     const BLOCK_SIZE bsize,
-                                     const uint8_t *const p0,
-                                     const uint8_t *const p1) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  assert(is_interintra_wedge_used(bsize));
-  assert(cpi->common.seq_params.enable_interintra_compound);
-
-  const struct buf_2d *const src = &x->plane[0].src;
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-  DECLARE_ALIGNED(32, int16_t, residual1[MAX_SB_SQUARE]);  // src - pred1
-  DECLARE_ALIGNED(32, int16_t, diff10[MAX_SB_SQUARE]);     // pred1 - pred0
-  if (get_bitdepth_data_path_index(xd)) {
-    aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride,
-                              CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
-    aom_highbd_subtract_block(bh, bw, diff10, bw, CONVERT_TO_BYTEPTR(p1), bw,
-                              CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
-  } else {
-    aom_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, p1, bw);
-    aom_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw);
-  }
-  int wedge_index = -1;
-  int64_t rd =
-      pick_wedge_fixed_sign(cpi, x, bsize, residual1, diff10, 0, &wedge_index);
-
-  mbmi->interintra_wedge_sign = 0;
-  mbmi->interintra_wedge_index = wedge_index;
-  return rd;
-}
-
-static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    const BLOCK_SIZE bsize,
-                                    const uint8_t *const p0,
-                                    const uint8_t *const p1,
-                                    const int16_t *const residual1,
-                                    const int16_t *const diff10) {
-  const COMPOUND_TYPE compound_type = x->e_mbd.mi[0]->interinter_comp.type;
-  switch (compound_type) {
-    case COMPOUND_WEDGE:
-      return pick_interinter_wedge(cpi, x, bsize, p0, p1, residual1, diff10);
-    case COMPOUND_DIFFWTD:
-      return pick_interinter_seg(cpi, x, bsize, p0, p1, residual1, diff10);
-    default: assert(0); return 0;
-  }
-}
-
-static int interinter_compound_motion_search(const AV1_COMP *const cpi,
-                                             MACROBLOCK *x,
-                                             const int_mv *const cur_mv,
-                                             const BLOCK_SIZE bsize,
-                                             const PREDICTION_MODE this_mode,
-                                             int mi_row, int mi_col) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  int_mv tmp_mv[2];
-  int tmp_rate_mv = 0;
-  mbmi->interinter_comp.seg_mask = xd->seg_mask;
-  const INTERINTER_COMPOUND_DATA *compound_data = &mbmi->interinter_comp;
-
-  if (this_mode == NEW_NEWMV) {
-    do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize,
-                                    mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
-    mbmi->mv[0].as_int = tmp_mv[0].as_int;
-    mbmi->mv[1].as_int = tmp_mv[1].as_int;
-  } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
-    do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize,
-                                    mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
-    mbmi->mv[0].as_int = tmp_mv[0].as_int;
-  } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
-    do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize,
-                                    mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
-    mbmi->mv[1].as_int = tmp_mv[1].as_int;
-  }
-  return tmp_rate_mv;
-}
-
-static void get_inter_predictors_masked_compound(
-    const AV1_COMP *const cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
-    int mi_row, int mi_col, uint8_t **preds0, uint8_t **preds1,
-    int16_t *residual1, int16_t *diff10, int *strides) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
-  int can_use_previous = cm->allow_warped_motion;
-  // get inter predictors to use for masked compound modes
-  av1_build_inter_predictors_for_planes_single_buf(
-      xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides, can_use_previous);
-  av1_build_inter_predictors_for_planes_single_buf(
-      xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides, can_use_previous);
-  const struct buf_2d *const src = &x->plane[0].src;
-  if (get_bitdepth_data_path_index(xd)) {
-    aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride,
-                              CONVERT_TO_BYTEPTR(*preds1), bw, xd->bd);
-    aom_highbd_subtract_block(bh, bw, diff10, bw, CONVERT_TO_BYTEPTR(*preds1),
-                              bw, CONVERT_TO_BYTEPTR(*preds0), bw, xd->bd);
-  } else {
-    aom_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, *preds1,
-                       bw);
-    aom_subtract_block(bh, bw, diff10, bw, *preds1, bw, *preds0, bw);
-  }
-}
-
-static int64_t build_and_cost_compound_type(
-    const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
-    const BLOCK_SIZE bsize, const PREDICTION_MODE this_mode, int *rs2,
-    int rate_mv, BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0,
-    uint8_t **preds1, int16_t *residual1, int16_t *diff10, int *strides,
-    int mi_row, int mi_col, int mode_rate, int64_t ref_best_rd,
-    int *calc_pred_masked_compound) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  int rate_sum;
-  int64_t dist_sum;
-  int64_t best_rd_cur = INT64_MAX;
-  int64_t rd = INT64_MAX;
-  int tmp_skip_txfm_sb;
-  int64_t tmp_skip_sse_sb;
-  const COMPOUND_TYPE compound_type = mbmi->interinter_comp.type;
-
-  if (*calc_pred_masked_compound) {
-    get_inter_predictors_masked_compound(cpi, x, bsize, mi_row, mi_col, preds0,
-                                         preds1, residual1, diff10, strides);
-    *calc_pred_masked_compound = 0;
-  }
-
-  best_rd_cur =
-      pick_interinter_mask(cpi, x, bsize, *preds0, *preds1, residual1, diff10);
-  *rs2 += get_interinter_compound_mask_rate(x, mbmi);
-  best_rd_cur += RDCOST(x->rdmult, *rs2 + rate_mv, 0);
-
-  // Although the true rate_mv might be different after motion search, but it
-  // is unlikely to be the best mode considering the transform rd cost and other
-  // mode overhead cost
-  int64_t mode_rd = RDCOST(x->rdmult, *rs2 + mode_rate, 0);
-  if (mode_rd > ref_best_rd) return INT64_MAX;
-
-  if (have_newmv_in_inter_mode(this_mode) && compound_type == COMPOUND_WEDGE) {
-    *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
-                                                     this_mode, mi_row, mi_col);
-    av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
-    model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND](
-        cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum,
-        &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL);
-    rd = RDCOST(x->rdmult, *rs2 + *out_rate_mv + rate_sum, dist_sum);
-    if (rd >= best_rd_cur) {
-      mbmi->mv[0].as_int = cur_mv[0].as_int;
-      mbmi->mv[1].as_int = cur_mv[1].as_int;
-      *out_rate_mv = rate_mv;
-      av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
-                                               preds1, strides);
-    }
-    rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
-                             &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
-    if (rd != INT64_MAX)
-      rd = RDCOST(x->rdmult, *rs2 + *out_rate_mv + rate_sum, dist_sum);
-    best_rd_cur = rd;
-
-  } else {
-    av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
-                                             preds1, strides);
-    rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
-                             &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
-    if (rd != INT64_MAX)
-      rd = RDCOST(x->rdmult, *rs2 + rate_mv + rate_sum, dist_sum);
-    best_rd_cur = rd;
-  }
-  return best_rd_cur;
-}
-
-typedef struct {
-  // OBMC secondary prediction buffers and respective strides
-  uint8_t *above_pred_buf[MAX_MB_PLANE];
-  int above_pred_stride[MAX_MB_PLANE];
-  uint8_t *left_pred_buf[MAX_MB_PLANE];
-  int left_pred_stride[MAX_MB_PLANE];
-  int_mv (*single_newmv)[REF_FRAMES];
-  // Pointer to array of motion vectors to use for each ref and their rates
-  // Should point to first of 2 arrays in 2D array
-  int (*single_newmv_rate)[REF_FRAMES];
-  int (*single_newmv_valid)[REF_FRAMES];
-  // Pointer to array of predicted rate-distortion
-  // Should point to first of 2 arrays in 2D array
-  int64_t (*modelled_rd)[MAX_REF_MV_SERCH][REF_FRAMES];
-  InterpFilter single_filter[MB_MODE_COUNT][REF_FRAMES];
-  int ref_frame_cost;
-  int single_comp_cost;
-  int64_t (*simple_rd)[MAX_REF_MV_SERCH][REF_FRAMES];
-  int skip_motion_mode;
-  INTERINTRA_MODE *inter_intra_mode;
-} HandleInterModeArgs;
-
-/* If the current mode shares the same mv with other modes with higher cost,
- * skip this mode. */
-static int skip_repeated_mv(const AV1_COMMON *const cm,
-                            const MACROBLOCK *const x,
-                            PREDICTION_MODE this_mode,
-                            const MV_REFERENCE_FRAME ref_frames[2],
-                            InterModeSearchState *search_state) {
-  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
-  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
-  const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
-  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
-  if (!is_comp_pred) {
-    if (this_mode == NEARMV) {
-      if (ref_mv_count == 0) {
-        // NEARMV has the same motion vector as NEARESTMV
-        compare_mode = NEARESTMV;
-      }
-      if (ref_mv_count == 1 &&
-          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
-        // NEARMV has the same motion vector as GLOBALMV
-        compare_mode = GLOBALMV;
-      }
-    }
-    if (this_mode == GLOBALMV) {
-      if (ref_mv_count == 0 &&
-          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
-        // GLOBALMV has the same motion vector as NEARESTMV
-        compare_mode = NEARESTMV;
-      }
-      if (ref_mv_count == 1) {
-        // GLOBALMV has the same motion vector as NEARMV
-        compare_mode = NEARMV;
-      }
-    }
-
-    if (compare_mode != MB_MODE_COUNT) {
-      // Use modelled_rd to check whether compare mode was searched
-      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
-          INT64_MAX) {
-        const int16_t mode_ctx =
-            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
-        const int compare_cost = cost_mv_ref(x, compare_mode, mode_ctx);
-        const int this_cost = cost_mv_ref(x, this_mode, mode_ctx);
-
-        // Only skip if the mode cost is larger than compare mode cost
-        if (this_cost > compare_cost) {
-          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
-              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
-          return 1;
-        }
-      }
-    }
-  }
-  return 0;
-}
-
-static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
-                                     const AV1_COMMON *cm,
-                                     const MACROBLOCK *x) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  *out_mv = in_mv;
-  lower_mv_precision(&out_mv->as_mv, cm->allow_high_precision_mv,
-                     cm->cur_frame_force_integer_mv);
-  clamp_mv2(&out_mv->as_mv, xd);
-  return !mv_check_bounds(&x->mv_limits, &out_mv->as_mv);
-}
-
-static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                            const BLOCK_SIZE bsize, int_mv *cur_mv,
-                            const int mi_row, const int mi_col,
-                            int *const rate_mv,
-                            HandleInterModeArgs *const args) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int is_comp_pred = has_second_ref(mbmi);
-  const PREDICTION_MODE this_mode = mbmi->mode;
-  const int refs[2] = { mbmi->ref_frame[0],
-                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
-  const int ref_mv_idx = mbmi->ref_mv_idx;
-  int i;
-
-  (void)args;
-
-  if (is_comp_pred) {
-    if (this_mode == NEW_NEWMV) {
-      cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
-      cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
-
-      if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
-        joint_motion_search(cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, NULL,
-                            0, rate_mv, 0);
-      } else {
-        *rate_mv = 0;
-        for (i = 0; i < 2; ++i) {
-          const int_mv ref_mv = av1_get_ref_mv(x, i);
-          av1_set_mvcost(x, i, mbmi->ref_mv_idx);
-          *rate_mv +=
-              av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv, x->nmvjointcost,
-                              x->mvcost, MV_COST_WEIGHT);
-        }
-      }
-    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
-      cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
-      if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
-        compound_single_motion_search_interinter(
-            cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
-      } else {
-        av1_set_mvcost(x, 1,
-                       mbmi->ref_mv_idx + (this_mode == NEAR_NEWMV ? 1 : 0));
-        const int_mv ref_mv = av1_get_ref_mv(x, 1);
-        *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
-                                   x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-      }
-    } else {
-      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
-      cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
-      if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
-        compound_single_motion_search_interinter(
-            cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
-      } else {
-        const int_mv ref_mv = av1_get_ref_mv(x, 0);
-        av1_set_mvcost(x, 0,
-                       mbmi->ref_mv_idx + (this_mode == NEW_NEARMV ? 1 : 0));
-        *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
-                                   x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-      }
-    }
-  } else {
-    single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
-    if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
-
-    args->single_newmv[ref_mv_idx][refs[0]] = x->best_mv;
-    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
-    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
-
-    cur_mv[0].as_int = x->best_mv.as_int;
-
-#if USE_DISCOUNT_NEWMV_TEST
-    // Estimate the rate implications of a new mv but discount this
-    // under certain circumstances where we want to help initiate a weak
-    // motion field, where the distortion gain for a single block may not
-    // be enough to overcome the cost of a new mv.
-    if (discount_newmv_test(cpi, x, this_mode, x->best_mv)) {
-      *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
-    }
-#endif
-  }
-
-  return 0;
-}
-
-static INLINE void swap_dst_buf(MACROBLOCKD *xd, const BUFFER_SET *dst_bufs[2],
-                                int num_planes) {
-  const BUFFER_SET *buf0 = dst_bufs[0];
-  dst_bufs[0] = dst_bufs[1];
-  dst_bufs[1] = buf0;
-  restore_dst_buf(xd, *dst_bufs[0], num_planes);
-}
-
-static INLINE int get_switchable_rate(MACROBLOCK *const x,
-                                      const InterpFilters filters,
-                                      const int ctx[2]) {
-  int inter_filter_cost;
-  const InterpFilter filter0 = av1_extract_interp_filter(filters, 0);
-  const InterpFilter filter1 = av1_extract_interp_filter(filters, 1);
-  inter_filter_cost = x->switchable_interp_costs[ctx[0]][filter0];
-  inter_filter_cost += x->switchable_interp_costs[ctx[1]][filter1];
-  return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
-}
-
-// calculate the rdcost of given interpolation_filter
-static INLINE int64_t interpolation_filter_rd(
-    MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-    int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd,
-    int *const switchable_rate, int *const skip_txfm_sb,
-    int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], int filter_idx,
-    const int switchable_ctx[2], const int skip_pred, int *rate,
-    int64_t *dist) {
-  const AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  int tmp_rate[2], tmp_skip_sb[2] = { 1, 1 };
-  int64_t tmp_dist[2], tmp_skip_sse[2] = { 0, 0 };
-
-  const InterpFilters last_best = mbmi->interp_filters;
-  mbmi->interp_filters = filter_sets[filter_idx];
-  const int tmp_rs =
-      get_switchable_rate(x, mbmi->interp_filters, switchable_ctx);
-
-  assert(skip_pred != 2);
-  assert((skip_pred >= 0) && (skip_pred <= cpi->default_interp_skip_flags));
-  assert(rate[0] >= 0);
-  assert(dist[0] >= 0);
-  assert((skip_txfm_sb[0] == 0) || (skip_txfm_sb[0] == 1));
-  assert(skip_sse_sb[0] >= 0);
-  assert(rate[1] >= 0);
-  assert(dist[1] >= 0);
-  assert((skip_txfm_sb[1] == 0) || (skip_txfm_sb[1] == 1));
-  assert(skip_sse_sb[1] >= 0);
-
-  if (skip_pred != cpi->default_interp_skip_flags) {
-    if (skip_pred != DEFAULT_LUMA_INTERP_SKIP_FLAG) {
-      av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize);
-#if CONFIG_COLLECT_RD_STATS == 3
-      RD_STATS rd_stats_y;
-      select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, INT64_MAX);
-      PrintPredictionUnitStats(cpi, x, &rd_stats_y, bsize);
-#endif  // CONFIG_COLLECT_RD_STATS == 3
-      model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER](
-          cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &tmp_rate[0], &tmp_dist[0],
-          &tmp_skip_sb[0], &tmp_skip_sse[0], NULL, NULL, NULL);
-      tmp_rate[1] = tmp_rate[0];
-      tmp_dist[1] = tmp_dist[0];
-    } else {
-      // only luma MC is skipped
-      tmp_rate[1] = rate[0];
-      tmp_dist[1] = dist[0];
-    }
-    if (num_planes > 1) {
-      for (int plane = 1; plane < num_planes; ++plane) {
-        int tmp_rate_uv, tmp_skip_sb_uv;
-        int64_t tmp_dist_uv, tmp_skip_sse_uv;
-        int64_t tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate[1], tmp_dist[1]);
-        if (tmp_rd >= *rd) {
-          mbmi->interp_filters = last_best;
-          return 0;
-        }
-        av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, orig_dst, bsize,
-                                       plane);
-        model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER](
-            cpi, bsize, x, xd, plane, plane, mi_row, mi_col, &tmp_rate_uv,
-            &tmp_dist_uv, &tmp_skip_sb_uv, &tmp_skip_sse_uv, NULL, NULL, NULL);
-        tmp_rate[1] =
-            (int)AOMMIN(((int64_t)tmp_rate[1] + (int64_t)tmp_rate_uv), INT_MAX);
-        tmp_dist[1] += tmp_dist_uv;
-        tmp_skip_sb[1] &= tmp_skip_sb_uv;
-        tmp_skip_sse[1] += tmp_skip_sse_uv;
-      }
-    }
-  } else {
-    // both luma and chroma MC is skipped
-    tmp_rate[1] = rate[1];
-    tmp_dist[1] = dist[1];
-  }
-  int64_t tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate[1], tmp_dist[1]);
-
-  if (tmp_rd < *rd) {
-    *rd = tmp_rd;
-    *switchable_rate = tmp_rs;
-    if (skip_pred != cpi->default_interp_skip_flags) {
-      if (skip_pred == 0) {
-        // Overwrite the data as current filter is the best one
-        tmp_skip_sb[1] = tmp_skip_sb[0] & tmp_skip_sb[1];
-        tmp_skip_sse[1] = tmp_skip_sse[0] + tmp_skip_sse[1];
-        memcpy(rate, tmp_rate, sizeof(*rate) * 2);
-        memcpy(dist, tmp_dist, sizeof(*dist) * 2);
-        memcpy(skip_txfm_sb, tmp_skip_sb, sizeof(*skip_txfm_sb) * 2);
-        memcpy(skip_sse_sb, tmp_skip_sse, sizeof(*skip_sse_sb) * 2);
-        // As luma MC data is computed, no need to recompute after the search
-        x->recalc_luma_mc_data = 0;
-      } else if (skip_pred == DEFAULT_LUMA_INTERP_SKIP_FLAG) {
-        // As luma MC data is not computed, update of luma data can be skipped
-        rate[1] = tmp_rate[1];
-        dist[1] = tmp_dist[1];
-        skip_txfm_sb[1] = skip_txfm_sb[0] & tmp_skip_sb[1];
-        skip_sse_sb[1] = skip_sse_sb[0] + tmp_skip_sse[1];
-        // As luma MC data is not recomputed and current filter is the best,
-        // indicate the possibility of recomputing MC data
-        // If current buffer contains valid MC data, toggle to indicate that
-        // luma MC data needs to be recomputed
-        x->recalc_luma_mc_data ^= 1;
-      }
-      swap_dst_buf(xd, dst_bufs, num_planes);
-    }
-    return 1;
-  }
-  mbmi->interp_filters = last_best;
-  return 0;
-}
-
-// Find the best rd filter in horizontal direction
-static INLINE int find_best_horiz_interp_filter_rd(
-    MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-    int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd,
-    int *const switchable_rate, int *const skip_txfm_sb,
-    int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2],
-    const int switchable_ctx[2], const int skip_hor, int *rate, int64_t *dist,
-    int best_dual_mode) {
-  int i;
-  const int bw = block_size_wide[bsize];
-  assert(best_dual_mode == 0);
-  if ((bw <= 4) && (skip_hor != cpi->default_interp_skip_flags)) {
-    int skip_pred = cpi->default_interp_skip_flags;
-    // Process the filters in reverse order to enable reusing rate and
-    // distortion (calcuated during EIGHTTAP_REGULAR) for MULTITAP_SHARP
-    for (i = (SWITCHABLE_FILTERS - 1); i >= 1; --i) {
-      if (interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
-                                  switchable_rate, skip_txfm_sb, skip_sse_sb,
-                                  dst_bufs, i, switchable_ctx, skip_pred, rate,
-                                  dist)) {
-        best_dual_mode = i;
-      }
-      skip_pred = skip_hor;
-    }
-  } else {
-    for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
-      if (interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
-                                  switchable_rate, skip_txfm_sb, skip_sse_sb,
-                                  dst_bufs, i, switchable_ctx, skip_hor, rate,
-                                  dist)) {
-        best_dual_mode = i;
-      }
-    }
-  }
-  return best_dual_mode;
-}
-
-// Find the best rd filter in vertical direction
-static INLINE void find_best_vert_interp_filter_rd(
-    MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-    int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd,
-    int *const switchable_rate, int *const skip_txfm_sb,
-    int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2],
-    const int switchable_ctx[2], const int skip_ver, int *rate, int64_t *dist,
-    int best_dual_mode, int filter_set_size) {
-  int i;
-  const int bh = block_size_high[bsize];
-  if ((bh <= 4) && (skip_ver != cpi->default_interp_skip_flags)) {
-    int skip_pred = cpi->default_interp_skip_flags;
-    // Process the filters in reverse order to enable reusing rate and
-    // distortion (calcuated during EIGHTTAP_REGULAR) for MULTITAP_SHARP
-    assert(filter_set_size == DUAL_FILTER_SET_SIZE);
-    for (i = (filter_set_size - SWITCHABLE_FILTERS + best_dual_mode);
-         i >= (best_dual_mode + SWITCHABLE_FILTERS); i -= SWITCHABLE_FILTERS) {
-      interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
-                              switchable_rate, skip_txfm_sb, skip_sse_sb,
-                              dst_bufs, i, switchable_ctx, skip_pred, rate,
-                              dist);
-      skip_pred = skip_ver;
-    }
-  } else {
-    for (i = best_dual_mode + SWITCHABLE_FILTERS; i < filter_set_size;
-         i += SWITCHABLE_FILTERS) {
-      interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
-                              switchable_rate, skip_txfm_sb, skip_sse_sb,
-                              dst_bufs, i, switchable_ctx, skip_ver, rate,
-                              dist);
-    }
-  }
-}
-
-// check if there is saved result match with this search
-static INLINE int is_interp_filter_match(const INTERPOLATION_FILTER_STATS *st,
-                                         MB_MODE_INFO *const mi) {
-  for (int i = 0; i < 2; ++i) {
-    if ((st->ref_frames[i] != mi->ref_frame[i]) ||
-        (st->mv[i].as_int != mi->mv[i].as_int)) {
-      return 0;
-    }
-  }
-  if (has_second_ref(mi) && st->comp_type != mi->interinter_comp.type) return 0;
-  return 1;
-}
-
-static INLINE int find_interp_filter_in_stats(MACROBLOCK *x,
-                                              MB_MODE_INFO *const mbmi) {
-  const int comp_idx = mbmi->compound_idx;
-  const int offset = x->interp_filter_stats_idx[comp_idx];
-  for (int j = 0; j < offset; ++j) {
-    const INTERPOLATION_FILTER_STATS *st = &x->interp_filter_stats[comp_idx][j];
-    if (is_interp_filter_match(st, mbmi)) {
-      mbmi->interp_filters = st->filters;
-      return j;
-    }
-  }
-  return -1;  // no match result found
-}
-
-static INLINE void save_interp_filter_search_stat(MACROBLOCK *x,
-                                                  MB_MODE_INFO *const mbmi) {
-  const int comp_idx = mbmi->compound_idx;
-  const int offset = x->interp_filter_stats_idx[comp_idx];
-  if (offset < MAX_INTERP_FILTER_STATS) {
-    INTERPOLATION_FILTER_STATS stat = { mbmi->interp_filters,
-                                        { mbmi->mv[0], mbmi->mv[1] },
-                                        { mbmi->ref_frame[0],
-                                          mbmi->ref_frame[1] },
-                                        mbmi->interinter_comp.type };
-    x->interp_filter_stats[comp_idx][offset] = stat;
-    x->interp_filter_stats_idx[comp_idx]++;
-  }
-}
-
-static int64_t interpolation_filter_search(
-    MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-    int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
-    BUFFER_SET *const orig_dst, InterpFilter (*const single_filter)[REF_FRAMES],
-    int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
-    int64_t *const skip_sse_sb, const int skip_build_pred,
-    HandleInterModeArgs *args, int64_t ref_best_rd) {
-  const AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int need_search =
-      av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd);
-  int i;
-  // Index 0 corresponds to luma rd data and index 1 corresponds to cummulative
-  // data of all planes
-  int tmp_rate[2] = { 0, 0 };
-  int64_t tmp_dist[2] = { 0, 0 };
-  int best_skip_txfm_sb[2] = { 1, 1 };
-  int64_t best_skip_sse_sb[2] = { 0, 0 };
-  const int ref_frame = xd->mi[0]->ref_frame[0];
-
-  (void)single_filter;
-  int match_found = -1;
-  const InterpFilter assign_filter = cm->interp_filter;
-  if (cpi->sf.skip_repeat_interpolation_filter_search && need_search) {
-    match_found = find_interp_filter_in_stats(x, mbmi);
-  }
-  if (!need_search || match_found == -1) {
-    set_default_interp_filters(mbmi, assign_filter);
-  }
-  int switchable_ctx[2];
-  switchable_ctx[0] = av1_get_pred_context_switchable_interp(xd, 0);
-  switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1);
-  *switchable_rate =
-      get_switchable_rate(x, mbmi->interp_filters, switchable_ctx);
-  if (!skip_build_pred)
-    av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
-
-#if CONFIG_COLLECT_RD_STATS == 3
-  RD_STATS rd_stats_y;
-  select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, INT64_MAX);
-  PrintPredictionUnitStats(cpi, x, &rd_stats_y, bsize);
-#endif  // CONFIG_COLLECT_RD_STATS == 3
-  model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER](
-      cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &tmp_rate[0], &tmp_dist[0],
-      &best_skip_txfm_sb[0], &best_skip_sse_sb[0], NULL, NULL, NULL);
-  if (num_planes > 1)
-    model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER](
-        cpi, bsize, x, xd, 1, num_planes - 1, mi_row, mi_col, &tmp_rate[1],
-        &tmp_dist[1], &best_skip_txfm_sb[1], &best_skip_sse_sb[1], NULL, NULL,
-        NULL);
-  tmp_rate[1] =
-      (int)AOMMIN((int64_t)tmp_rate[0] + (int64_t)tmp_rate[1], INT_MAX);
-  assert(tmp_rate[1] >= 0);
-  tmp_dist[1] = tmp_dist[0] + tmp_dist[1];
-  best_skip_txfm_sb[1] = best_skip_txfm_sb[0] & best_skip_txfm_sb[1];
-  best_skip_sse_sb[1] = best_skip_sse_sb[0] + best_skip_sse_sb[1];
-  *rd = RDCOST(x->rdmult, (*switchable_rate + tmp_rate[1]), tmp_dist[1]);
-  *skip_txfm_sb = best_skip_txfm_sb[1];
-  *skip_sse_sb = best_skip_sse_sb[1];
-  x->pred_sse[ref_frame] = (unsigned int)(best_skip_sse_sb[0] >> 4);
-
-  if (assign_filter != SWITCHABLE || match_found != -1) {
-    return 0;
-  }
-  if (!need_search) {
-    assert(mbmi->interp_filters ==
-           av1_broadcast_interp_filter(EIGHTTAP_REGULAR));
-    return 0;
-  }
-  if (args->modelled_rd != NULL) {
-    if (has_second_ref(mbmi)) {
-      const int ref_mv_idx = mbmi->ref_mv_idx;
-      int refs[2] = { mbmi->ref_frame[0],
-                      (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
-      const int mode0 = compound_ref0_mode(mbmi->mode);
-      const int mode1 = compound_ref1_mode(mbmi->mode);
-      const int64_t mrd = AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
-                                 args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
-      if ((*rd >> 1) > mrd && ref_best_rd < INT64_MAX) {
-        return INT64_MAX;
-      }
-    }
-  }
-
-  x->recalc_luma_mc_data = 0;
-  // skip_flag=xx (in binary form)
-  // Setting 0th flag corresonds to skipping luma MC and setting 1st bt
-  // corresponds to skipping chroma MC  skip_flag=0 corresponds to "Don't skip
-  // luma and chroma MC"  Skip flag=1 corresponds to "Skip Luma MC only"
-  // Skip_flag=2 is not a valid case
-  // skip_flag=3 corresponds to "Skip both luma and chroma MC"
-  int skip_hor = cpi->default_interp_skip_flags;
-  int skip_ver = cpi->default_interp_skip_flags;
-  const int is_compound = has_second_ref(mbmi);
-  assert(is_intrabc_block(mbmi) == 0);
-  for (int j = 0; j < 1 + is_compound; ++j) {
-    const RefBuffer *ref_buf = &cm->frame_refs[mbmi->ref_frame[j] - LAST_FRAME];
-    const struct scale_factors *const sf = &ref_buf->sf;
-    // TODO(any): Refine skip flag calculation considering scaling
-    if (av1_is_scaled(sf)) {
-      skip_hor = 0;
-      skip_ver = 0;
-      break;
-    }
-    const MV mv = mbmi->mv[j].as_mv;
-    int skip_hor_plane = 0;
-    int skip_ver_plane = 0;
-    for (int k = 0; k < AOMMAX(1, (num_planes - 1)); ++k) {
-      struct macroblockd_plane *const pd = &xd->plane[k];
-      const int bw = pd->width;
-      const int bh = pd->height;
-      const MV mv_q4 = clamp_mv_to_umv_border_sb(
-          xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
-      const int sub_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
-      const int sub_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
-      skip_hor_plane |= ((sub_x == 0) << k);
-      skip_ver_plane |= ((sub_y == 0) << k);
-    }
-    skip_hor = skip_hor & skip_hor_plane;
-    skip_ver = skip_ver & skip_ver_plane;
-    // It is not valid that "luma MV is sub-pel, whereas chroma MV is not"
-    assert(skip_hor != 2);
-    assert(skip_ver != 2);
-  }
-  // When compond prediction type is compound segment wedge, luma MC and chroma
-  // MC need to go hand in hand as mask generated during luma MC is reuired for
-  // chroma MC. If skip_hor = 0 and skip_ver = 1, mask used for chroma MC during
-  // vertical filter decision may be incorrect as temporary MC evaluation
-  // overwrites the mask. Make skip_ver as 0 for this case so that mask is
-  // populated during luma MC
-  if (is_compound && mbmi->compound_idx == 1 &&
-      mbmi->interinter_comp.type == COMPOUND_DIFFWTD) {
-    assert(mbmi->comp_group_idx == 1);
-    if (skip_hor == 0 && skip_ver == 1) skip_ver = 0;
-  }
-  // do interp_filter search
-  const int filter_set_size = DUAL_FILTER_SET_SIZE;
-  restore_dst_buf(xd, *tmp_dst, num_planes);
-  const BUFFER_SET *dst_bufs[2] = { tmp_dst, orig_dst };
-  if (cpi->sf.use_fast_interpolation_filter_search &&
-      cm->seq_params.enable_dual_filter) {
-    // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR
-    int best_dual_mode = 0;
-    // Find best of {R}x{R,Sm,Sh}
-    // EIGHTTAP_REGULAR mode is calculated beforehand
-    best_dual_mode = find_best_horiz_interp_filter_rd(
-        x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate,
-        best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, switchable_ctx, skip_hor,
-        tmp_rate, tmp_dist, best_dual_mode);
-
-    // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes
-    find_best_vert_interp_filter_rd(
-        x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate,
-        best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, switchable_ctx, skip_ver,
-        tmp_rate, tmp_dist, best_dual_mode, filter_set_size);
-  } else {
-    // EIGHTTAP_REGULAR mode is calculated beforehand
-    for (i = 1; i < filter_set_size; ++i) {
-      if (cm->seq_params.enable_dual_filter == 0) {
-        const int16_t filter_y = filter_sets[i] & 0xffff;
-        const int16_t filter_x = filter_sets[i] >> 16;
-        if (filter_x != filter_y) continue;
-      }
-      interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
-                              switchable_rate, best_skip_txfm_sb,
-                              best_skip_sse_sb, dst_bufs, i, switchable_ctx, 0,
-                              tmp_rate, tmp_dist);
-      assert(x->recalc_luma_mc_data == 0);
-    }
-  }
-  swap_dst_buf(xd, dst_bufs, num_planes);
-  // Recompute final MC data if required
-  if (x->recalc_luma_mc_data == 1) {
-    // Recomputing final luma MC data is required only if the same was skipped
-    // in either of the directions  Condition below is necessary, but not
-    // sufficient
-    assert((skip_hor == 1) || (skip_ver == 1));
-    av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize);
-  }
-  *skip_txfm_sb = best_skip_txfm_sb[1];
-  *skip_sse_sb = best_skip_sse_sb[1];
-  x->pred_sse[ref_frame] = (unsigned int)(best_skip_sse_sb[0] >> 4);
-
-  // save search results
-  if (cpi->sf.skip_repeat_interpolation_filter_search) {
-    assert(match_found == -1);
-    save_interp_filter_search_stat(x, mbmi);
-  }
-  return 0;
-}
-
-static int txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                       int mi_row, int mi_col, RD_STATS *rd_stats,
-                       RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
-                       int mode_rate, int64_t ref_best_rd) {
-  /*
-   * This function combines y and uv planes' transform search processes
-   * together, when the prediction is generated. It first does subtration to
-   * obtain the prediction error. Then it calls
-   * select_tx_type_yrd/super_block_yrd and inter_block_uvrd sequentially and
-   * handles the early terminations happen in those functions. At the end, it
-   * computes the rd_stats/_y/_uv accordingly.
-   */
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  int skip_txfm_sb = 0;
-  const int num_planes = av1_num_planes(cm);
-  const int ref_frame_1 = mbmi->ref_frame[1];
-  const int64_t mode_rd = RDCOST(x->rdmult, mode_rate, 0);
-  const int64_t rd_thresh =
-      ref_best_rd == INT64_MAX ? INT64_MAX : ref_best_rd - mode_rd;
-  const int skip_ctx = av1_get_skip_context(xd);
-  const int64_t min_header_rate =
-      mode_rate + AOMMIN(x->skip_cost[skip_ctx][0], x->skip_cost[skip_ctx][1]);
-  // Account for minimum skip and non_skip rd.
-  // Eventually either one of them will be added to mode_rate
-  const int64_t min_header_rd_possible = RDCOST(x->rdmult, min_header_rate, 0);
-
-  if (min_header_rd_possible > ref_best_rd) {
-    av1_invalid_rd_stats(rd_stats_y);
-    av1_invalid_rd_stats(rd_stats);
-    return 0;
-  }
-
-  av1_init_rd_stats(rd_stats);
-  av1_init_rd_stats(rd_stats_y);
-  av1_init_rd_stats(rd_stats_uv);
-  rd_stats->rate = mode_rate;
-
-  if (!cpi->common.all_lossless)
-    check_block_skip(cpi, bsize, x, xd, 0, num_planes - 1, &skip_txfm_sb);
-  if (!skip_txfm_sb) {
-    int64_t non_skip_rdcosty = INT64_MAX;
-    int64_t skip_rdcosty = INT64_MAX;
-    int64_t min_rdcosty = INT64_MAX;
-    int is_cost_valid_uv = 0;
-
-    // cost and distortion
-    av1_subtract_plane(x, bsize, 0);
-    if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
-      // Motion mode
-      select_tx_type_yrd(cpi, x, rd_stats_y, bsize, mi_row, mi_col, rd_thresh);
-#if CONFIG_COLLECT_RD_STATS == 2
-      PrintPredictionUnitStats(cpi, x, rd_stats_y, bsize);
-#endif  // CONFIG_COLLECT_RD_STATS == 2
-    } else {
-      super_block_yrd(cpi, x, rd_stats_y, bsize, rd_thresh);
-      memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
-      for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
-        set_blk_skip(x, 0, i, rd_stats_y->skip);
-    }
-
-    if (rd_stats_y->rate == INT_MAX) {
-      av1_invalid_rd_stats(rd_stats);
-      // TODO(angiebird): check if we need this
-      // restore_dst_buf(xd, *orig_dst, num_planes);
-      mbmi->ref_frame[1] = ref_frame_1;
-      return 0;
-    }
-
-    av1_merge_rd_stats(rd_stats, rd_stats_y);
-
-    non_skip_rdcosty = RDCOST(
-        x->rdmult, rd_stats->rate + x->skip_cost[skip_ctx][0], rd_stats->dist);
-    skip_rdcosty =
-        RDCOST(x->rdmult, mode_rate + x->skip_cost[skip_ctx][1], rd_stats->sse);
-    min_rdcosty = AOMMIN(non_skip_rdcosty, skip_rdcosty);
-
-    if (min_rdcosty > ref_best_rd) {
-      int64_t tokenonly_rdy =
-          AOMMIN(RDCOST(x->rdmult, rd_stats_y->rate, rd_stats_y->dist),
-                 RDCOST(x->rdmult, 0, rd_stats_y->sse));
-      // Invalidate rd_stats_y to skip the rest of the motion modes search
-      if (tokenonly_rdy - (tokenonly_rdy >> cpi->sf.adaptive_txb_search_level) >
-          rd_thresh)
-        av1_invalid_rd_stats(rd_stats_y);
-      mbmi->ref_frame[1] = ref_frame_1;
-      return 0;
-    }
-
-    if (num_planes > 1) {
-      /* clang-format off */
-      is_cost_valid_uv =
-          inter_block_uvrd(cpi, x, rd_stats_uv, bsize,
-                           ref_best_rd - non_skip_rdcosty,
-                           ref_best_rd - skip_rdcosty, FTXS_NONE);
-      if (!is_cost_valid_uv) {
-        mbmi->ref_frame[1] = ref_frame_1;
-        return 0;
-      }
-      /* clang-format on */
-      av1_merge_rd_stats(rd_stats, rd_stats_uv);
-    } else {
-      av1_init_rd_stats(rd_stats_uv);
-    }
-    if (rd_stats->skip) {
-      rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
-      rd_stats_y->rate = 0;
-      rd_stats_uv->rate = 0;
-      rd_stats->rate += x->skip_cost[skip_ctx][1];
-      mbmi->skip = 0;
-      // here mbmi->skip temporarily plays a role as what this_skip2 does
-
-      int64_t tmprd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-      if (tmprd > ref_best_rd) {
-        mbmi->ref_frame[1] = ref_frame_1;
-        return 0;
-      }
-    } else if (!xd->lossless[mbmi->segment_id] &&
-               (RDCOST(x->rdmult,
-                       rd_stats_y->rate + rd_stats_uv->rate +
-                           x->skip_cost[skip_ctx][0],
-                       rd_stats->dist) >=
-                RDCOST(x->rdmult, x->skip_cost[skip_ctx][1], rd_stats->sse))) {
-      rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
-      rd_stats->rate += x->skip_cost[skip_ctx][1];
-      rd_stats->dist = rd_stats->sse;
-      rd_stats_y->rate = 0;
-      rd_stats_uv->rate = 0;
-      mbmi->skip = 1;
-    } else {
-      rd_stats->rate += x->skip_cost[skip_ctx][0];
-      mbmi->skip = 0;
-    }
-  } else {
-    x->skip = 1;
-    mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
-    // The cost of skip bit needs to be added.
-    mbmi->skip = 0;
-    rd_stats->rate += x->skip_cost[skip_ctx][1];
-
-    rd_stats->dist = 0;
-    rd_stats->sse = 0;
-    rd_stats_y->rate = 0;
-    rd_stats_uv->rate = 0;
-    rd_stats->skip = 1;
-    int64_t tmprd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-    if (tmprd > ref_best_rd) {
-      mbmi->ref_frame[1] = ref_frame_1;
-      return 0;
-    }
-  }
-  return 1;
-}
-
-static int handle_inter_intra_mode(const AV1_COMP *const cpi,
-                                   MACROBLOCK *const x, BLOCK_SIZE bsize,
-                                   int mi_row, int mi_col, MB_MODE_INFO *mbmi,
-                                   HandleInterModeArgs *args,
-                                   int64_t ref_best_rd, int *rate_mv,
-                                   int *tmp_rate2, BUFFER_SET *orig_dst) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *xd = &x->e_mbd;
-
-  INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
-  int64_t rd, best_interintra_rd = INT64_MAX;
-  int rmode, rate_sum;
-  int64_t dist_sum;
-  int tmp_rate_mv = 0;
-  int tmp_skip_txfm_sb;
-  int bw = block_size_wide[bsize];
-  int64_t tmp_skip_sse_sb;
-  DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_INTERINTRA_SB_SQUARE]);
-  DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_INTERINTRA_SB_SQUARE]);
-  uint8_t *tmp_buf = get_buf_by_bd(xd, tmp_buf_);
-  uint8_t *intrapred = get_buf_by_bd(xd, intrapred_);
-  const int *const interintra_mode_cost =
-      x->interintra_mode_cost[size_group_lookup[bsize]];
-  const int_mv mv0 = mbmi->mv[0];
-  const int is_wedge_used = is_interintra_wedge_used(bsize);
-  int rwedge = is_wedge_used ? x->wedge_interintra_cost[bsize][0] : 0;
-  mbmi->ref_frame[1] = NONE_FRAME;
-  xd->plane[0].dst.buf = tmp_buf;
-  xd->plane[0].dst.stride = bw;
-  av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
-
-  restore_dst_buf(xd, *orig_dst, num_planes);
-  mbmi->ref_frame[1] = INTRA_FRAME;
-  mbmi->use_wedge_interintra = 0;
-  best_interintra_mode = args->inter_intra_mode[mbmi->ref_frame[0]];
-  int j = 0;
-  if (cpi->sf.reuse_inter_intra_mode == 0 ||
-      best_interintra_mode == INTERINTRA_MODES) {
-    for (j = 0; j < INTERINTRA_MODES; ++j) {
-      mbmi->interintra_mode = (INTERINTRA_MODE)j;
-      rmode = interintra_mode_cost[mbmi->interintra_mode];
-      av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
-                                                intrapred, bw);
-      av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
-      model_rd_sb_fn[MODELRD_TYPE_INTERINTRA](
-          cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum,
-          &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL);
-      rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
-      if (rd < best_interintra_rd) {
-        best_interintra_rd = rd;
-        best_interintra_mode = mbmi->interintra_mode;
-      }
-    }
-    args->inter_intra_mode[mbmi->ref_frame[0]] = best_interintra_mode;
-  }
-  if (j == 0 || best_interintra_mode != II_SMOOTH_PRED) {
-    mbmi->interintra_mode = best_interintra_mode;
-    rmode = interintra_mode_cost[mbmi->interintra_mode];
-    av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
-                                              intrapred, bw);
-    av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
-  }
-  rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
-                           &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
-  if (rd != INT64_MAX)
-    rd = RDCOST(x->rdmult, *rate_mv + rmode + rate_sum + rwedge, dist_sum);
-  best_interintra_rd = rd;
-  if (ref_best_rd < INT64_MAX && (best_interintra_rd >> 1) > ref_best_rd) {
-    return -1;
-  }
-  if (is_wedge_used) {
-    int64_t best_interintra_rd_nowedge = rd;
-    int64_t best_interintra_rd_wedge = INT64_MAX;
-    int_mv tmp_mv;
-    // Disable wedge search if source variance is small
-    if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
-      mbmi->use_wedge_interintra = 1;
-
-      rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
-               x->wedge_interintra_cost[bsize][1];
-
-      best_interintra_rd_wedge =
-          pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
-
-      best_interintra_rd_wedge +=
-          RDCOST(x->rdmult, rmode + *rate_mv + rwedge, 0);
-      rd = INT64_MAX;
-      // Refine motion vector.
-      if (have_newmv_in_inter_mode(mbmi->mode)) {
-        // get negative of mask
-        const uint8_t *mask = av1_get_contiguous_soft_mask(
-            mbmi->interintra_wedge_index, 1, bsize);
-        tmp_mv = mbmi->mv[0];
-        compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
-                                      mi_col, intrapred, mask, bw, &tmp_rate_mv,
-                                      0);
-        if (mbmi->mv[0].as_int != tmp_mv.as_int) {
-          mbmi->mv[0].as_int = tmp_mv.as_int;
-          av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst,
-                                         bsize);
-          model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND](
-              cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum,
-              &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL);
-          rd = RDCOST(x->rdmult, tmp_rate_mv + rmode + rate_sum + rwedge,
-                      dist_sum);
-        }
-      }
-      if (rd >= best_interintra_rd_wedge) {
-        tmp_mv.as_int = mv0.as_int;
-        tmp_rate_mv = *rate_mv;
-        av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
-      }
-      // Evaluate closer to true rd
-      rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
-                               &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
-      if (rd != INT64_MAX)
-        rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
-                    dist_sum);
-      best_interintra_rd_wedge = rd;
-      if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
-        mbmi->use_wedge_interintra = 1;
-        mbmi->mv[0].as_int = tmp_mv.as_int;
-        *tmp_rate2 += tmp_rate_mv - *rate_mv;
-        *rate_mv = tmp_rate_mv;
-      } else {
-        mbmi->use_wedge_interintra = 0;
-        mbmi->mv[0].as_int = mv0.as_int;
-        av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize);
-      }
-    } else {
-      mbmi->use_wedge_interintra = 0;
-    }
-  }  // if (is_interintra_wedge_used(bsize))
-  if (num_planes > 1) {
-    av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, orig_dst, bsize);
-  }
-  return 0;
-}
-
-// TODO(afergs): Refactor the MBMI references in here - there's four
-// TODO(afergs): Refactor optional args - add them to a struct or remove
-static int64_t motion_mode_rd(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                              BLOCK_SIZE bsize, RD_STATS *rd_stats,
-                              RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
-                              int *disable_skip, int mi_row, int mi_col,
-                              HandleInterModeArgs *const args,
-                              int64_t ref_best_rd, const int *refs,
-                              int *rate_mv, BUFFER_SET *orig_dst
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-                              ,
-                              TileDataEnc *tile_data, int64_t *best_est_rd,
-                              int do_tx_search, InterModesInfo *inter_modes_info
-#endif
-) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const int is_comp_pred = has_second_ref(mbmi);
-  const PREDICTION_MODE this_mode = mbmi->mode;
-  const int rate2_nocoeff = rd_stats->rate;
-  int best_xskip, best_disable_skip = 0;
-  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
-  MB_MODE_INFO base_mbmi, best_mbmi;
-  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-  const int rate_mv0 = *rate_mv;
-
-  int interintra_allowed = cm->seq_params.enable_interintra_compound &&
-                           is_interintra_allowed(mbmi) && mbmi->compound_idx;
-  int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
-
-  assert(mbmi->ref_frame[1] != INTRA_FRAME);
-  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
-  av1_invalid_rd_stats(&best_rd_stats);
-  aom_clear_system_state();
-  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
-  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
-  if (cm->switchable_motion_mode) {
-    last_motion_mode_allowed = motion_mode_allowed(xd->global_motion, xd, mbmi,
-                                                   cm->allow_warped_motion);
-  }
-  if (last_motion_mode_allowed == WARPED_CAUSAL) {
-    mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0);
-  }
-  int total_samples = mbmi->num_proj_ref;
-  if (total_samples == 0) {
-    last_motion_mode_allowed = OBMC_CAUSAL;
-  }
-  base_mbmi = *mbmi;
-
-  const int switchable_rate =
-      av1_is_interp_needed(xd) ? av1_get_switchable_rate(cm, x, xd) : 0;
-  int64_t best_rd = INT64_MAX;
-  int best_rate_mv = rate_mv0;
-  for (int mode_index = (int)SIMPLE_TRANSLATION;
-       mode_index <= (int)last_motion_mode_allowed + interintra_allowed;
-       mode_index++) {
-    if (args->skip_motion_mode && mode_index) continue;
-    int64_t tmp_rd = INT64_MAX;
-    int tmp_rate2 = rate2_nocoeff;
-    int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
-    int skip_txfm_sb = 0;
-    int tmp_rate_mv = rate_mv0;
-
-    *mbmi = base_mbmi;
-    if (is_interintra_mode) {
-      mbmi->motion_mode = SIMPLE_TRANSLATION;
-    } else {
-      mbmi->motion_mode = (MOTION_MODE)mode_index;
-      assert(mbmi->ref_frame[1] != INTRA_FRAME);
-    }
-
-    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
-      // SIMPLE_TRANSLATION mode: no need to recalculate.
-      // The prediction is calculated before motion_mode_rd() is called in
-      // handle_inter_mode()
-    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
-      uint32_t cur_mv = mbmi->mv[0].as_int;
-      assert(!is_comp_pred);
-      if (have_newmv_in_inter_mode(this_mode)) {
-        single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv);
-        mbmi->mv[0].as_int = x->best_mv.as_int;
-#if USE_DISCOUNT_NEWMV_TEST
-        if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) {
-          tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
-        }
-#endif
-        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
-      }
-      if (mbmi->mv[0].as_int != cur_mv) {
-        av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
-      }
-      av1_build_obmc_inter_prediction(
-          cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
-          args->left_pred_buf, args->left_pred_stride);
-    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
-      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
-      mbmi->motion_mode = WARPED_CAUSAL;
-      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
-      mbmi->interp_filters = av1_broadcast_interp_filter(
-          av1_unswitchable_filter(cm->interp_filter));
-
-      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
-      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
-      // Select the samples according to motion vector difference
-      if (mbmi->num_proj_ref > 1) {
-        mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
-                                           mbmi->num_proj_ref, bsize);
-      }
-
-      if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
-                           mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
-                           &mbmi->wm_params, mi_row, mi_col)) {
-        // Refine MV for NEWMV mode
-        assert(!is_comp_pred);
-        if (have_newmv_in_inter_mode(this_mode)) {
-          const int_mv mv0 = mbmi->mv[0];
-          const WarpedMotionParams wm_params0 = mbmi->wm_params;
-          int num_proj_ref0 = mbmi->num_proj_ref;
-
-          // Refine MV in a small range.
-          av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
-                               total_samples);
-
-          // Keep the refined MV and WM parameters.
-          if (mv0.as_int != mbmi->mv[0].as_int) {
-            const int ref = refs[0];
-            const int_mv ref_mv = av1_get_ref_mv(x, 0);
-            tmp_rate_mv =
-                av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv.as_mv,
-                                x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-
-            if (cpi->sf.adaptive_motion_search)
-              x->pred_mv[ref] = mbmi->mv[0].as_mv;
-
-#if USE_DISCOUNT_NEWMV_TEST
-            if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) {
-              tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
-            }
-#endif
-            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
-          } else {
-            // Restore the old MV and WM parameters.
-            mbmi->mv[0] = mv0;
-            mbmi->wm_params = wm_params0;
-            mbmi->num_proj_ref = num_proj_ref0;
-          }
-        }
-
-        av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
-      } else {
-        continue;
-      }
-    } else if (is_interintra_mode) {
-      const int ret = handle_inter_intra_mode(
-          cpi, x, bsize, mi_row, mi_col, mbmi, args, ref_best_rd, &tmp_rate_mv,
-          &tmp_rate2, orig_dst);
-      if (ret < 0) continue;
-    }
-
-    if (!cpi->common.all_lossless)
-      check_block_skip(cpi, bsize, x, xd, 0, num_planes - 1, &skip_txfm_sb);
-
-    x->skip = 0;
-
-    rd_stats->dist = 0;
-    rd_stats->sse = 0;
-    rd_stats->skip = 1;
-    rd_stats->rate = tmp_rate2;
-    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
-    if (interintra_allowed) {
-      rd_stats->rate += x->interintra_cost[size_group_lookup[bsize]]
-                                          [mbmi->ref_frame[1] == INTRA_FRAME];
-      if (mbmi->ref_frame[1] == INTRA_FRAME) {
-        rd_stats->rate += x->interintra_mode_cost[size_group_lookup[bsize]]
-                                                 [mbmi->interintra_mode];
-        if (is_interintra_wedge_used(bsize)) {
-          rd_stats->rate +=
-              x->wedge_interintra_cost[bsize][mbmi->use_wedge_interintra];
-          if (mbmi->use_wedge_interintra) {
-            rd_stats->rate +=
-                av1_cost_literal(get_interintra_wedge_bits(bsize));
-          }
-        }
-      }
-    }
-    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
-        (mbmi->ref_frame[1] != INTRA_FRAME)) {
-      if (last_motion_mode_allowed == WARPED_CAUSAL) {
-        rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode];
-      } else {
-        rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode];
-      }
-    }
-
-    if (!skip_txfm_sb) {
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-      int64_t est_rd = 0;
-      int est_skip = 0;
-      if (cpi->sf.inter_mode_rd_model_estimation && cm->tile_cols == 1 &&
-          cm->tile_rows == 1) {
-        InterModeRdModel *md = &tile_data->inter_mode_rd_models[mbmi->sb_type];
-        if (md->ready) {
-          const int64_t curr_sse = get_sse(cpi, x);
-          est_rd = get_est_rd(tile_data, mbmi->sb_type, x->rdmult, curr_sse,
-                              rd_stats->rate);
-          est_skip = est_rd * 0.8 > *best_est_rd;
-          if (est_skip) {
-            mbmi->ref_frame[1] = ref_frame_1;
-            continue;
-          } else {
-            if (est_rd < *best_est_rd) {
-              *best_est_rd = est_rd;
-            }
-          }
-        }
-      }
-#endif  // CONFIG_COLLECT_INTER_MODE_RD_STATS
-    }
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-    if (!do_tx_search) {
-      const int64_t curr_sse = get_sse(cpi, x);
-      int est_residue_cost = 0;
-      int64_t est_dist = 0;
-      const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
-                                               &est_residue_cost, &est_dist);
-      (void)has_est_rd;
-      assert(has_est_rd);
-      const int mode_rate = rd_stats->rate;
-      rd_stats->rate += est_residue_cost;
-      rd_stats->dist = est_dist;
-      rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-      if (cm->reference_mode == SINGLE_REFERENCE) {
-        if (!is_comp_pred) {
-          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
-                                rd_stats->rdcost, mbmi);
-        }
-      } else {
-        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
-                              rd_stats->rdcost, mbmi);
-      }
-    } else {
-#endif
-      int mode_rate = rd_stats->rate;
-      if (!txfm_search(cpi, x, bsize, mi_row, mi_col, rd_stats, rd_stats_y,
-                       rd_stats_uv, mode_rate, ref_best_rd)) {
-        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
-          return INT64_MAX;
-        }
-        continue;
-      }
-      if (!skip_txfm_sb) {
-        const int64_t curr_rd =
-            RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-        if (curr_rd < ref_best_rd) {
-          ref_best_rd = curr_rd;
-        }
-        *disable_skip = 0;
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-        if (cpi->sf.inter_mode_rd_model_estimation) {
-          const int skip_ctx = av1_get_skip_context(xd);
-          inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats->sse,
-                               rd_stats->dist,
-                               rd_stats_y->rate + rd_stats_uv->rate +
-                                   x->skip_cost[skip_ctx][mbmi->skip]);
-        }
-#endif  // CONFIG_COLLECT_INTER_MODE_RD_STATS
-      } else {
-        *disable_skip = 1;
-      }
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-    }
-#endif
-
-    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
-      if (is_nontrans_global_motion(xd, xd->mi[0])) {
-        mbmi->interp_filters = av1_broadcast_interp_filter(
-            av1_unswitchable_filter(cm->interp_filter));
-      }
-    }
-
-    tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-    if (mode_index == 0)
-      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
-    if ((mode_index == 0) || (tmp_rd < best_rd)) {
-      best_mbmi = *mbmi;
-      best_rd = tmp_rd;
-      best_rd_stats = *rd_stats;
-      best_rd_stats_y = *rd_stats_y;
-      best_rate_mv = tmp_rate_mv;
-      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
-      memcpy(best_blk_skip, x->blk_skip,
-             sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
-      best_xskip = x->skip;
-      best_disable_skip = *disable_skip;
-      if (best_xskip) break;
-    }
-  }
-  mbmi->ref_frame[1] = ref_frame_1;
-  *rate_mv = best_rate_mv;
-  if (best_rd == INT64_MAX) {
-    av1_invalid_rd_stats(rd_stats);
-    restore_dst_buf(xd, *orig_dst, num_planes);
-    return INT64_MAX;
-  }
-  *mbmi = best_mbmi;
-  *rd_stats = best_rd_stats;
-  *rd_stats_y = best_rd_stats_y;
-  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
-  memcpy(x->blk_skip, best_blk_skip,
-         sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
-  x->skip = best_xskip;
-  *disable_skip = best_disable_skip;
-
-  restore_dst_buf(xd, *orig_dst, num_planes);
-  return 0;
-}
-
-static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
-                            MACROBLOCK *const x, BLOCK_SIZE bsize, int mi_row,
-                            int mi_col, BUFFER_SET *const orig_dst) {
-  const AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
-
-  int64_t total_sse = 0;
-  for (int plane = 0; plane < num_planes; ++plane) {
-    const struct macroblock_plane *const p = &x->plane[plane];
-    const struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-    const int bw = block_size_wide[plane_bsize];
-    const int bh = block_size_high[plane_bsize];
-
-    av1_subtract_plane(x, bsize, plane);
-    int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh);
-    sse = sse << 4;
-    total_sse += sse;
-  }
-  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
-  rd_stats->dist = rd_stats->sse = total_sse;
-  rd_stats->rate = x->skip_mode_cost[skip_mode_ctx][1];
-  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-
-  restore_dst_buf(xd, *orig_dst, num_planes);
-  return 0;
-}
-
-static INLINE int get_ref_mv_offset(PREDICTION_MODE single_mode,
-                                    uint8_t ref_mv_idx) {
-  assert(is_inter_singleref_mode(single_mode));
-  int ref_mv_offset;
-  if (single_mode == NEARESTMV) {
-    ref_mv_offset = 0;
-  } else if (single_mode == NEARMV) {
-    ref_mv_offset = ref_mv_idx + 1;
-  } else {
-    ref_mv_offset = -1;
-  }
-  return ref_mv_offset;
-}
-
-static INLINE void get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
-                               int ref_idx, int ref_mv_idx,
-                               const MV_REFERENCE_FRAME *ref_frame,
-                               const MB_MODE_INFO_EXT *mbmi_ext) {
-  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
-  const int is_comp_pred = ref_frame[1] > INTRA_FRAME;
-  const PREDICTION_MODE single_mode =
-      get_single_mode(this_mode, ref_idx, is_comp_pred);
-  assert(is_inter_singleref_mode(single_mode));
-  if (single_mode == NEWMV) {
-    this_mv->as_int = INVALID_MV;
-  } else if (single_mode == GLOBALMV) {
-    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
-  } else {
-    assert(single_mode == NEARMV || single_mode == NEARESTMV);
-    const int ref_mv_offset = get_ref_mv_offset(single_mode, ref_mv_idx);
-    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
-      assert(ref_mv_offset >= 0);
-      if (ref_idx == 0) {
-        *this_mv =
-            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
-      } else {
-        *this_mv =
-            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
-      }
-    } else {
-      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
-    }
-  }
-}
-
-// This function update the non-new mv for the current prediction mode
-static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
-                               const AV1_COMMON *cm, const MACROBLOCK *x) {
-  const MACROBLOCKD *xd = &x->e_mbd;
-  const MB_MODE_INFO *mbmi = xd->mi[0];
-  const int is_comp_pred = has_second_ref(mbmi);
-  int ret = 1;
-  for (int i = 0; i < is_comp_pred + 1; ++i) {
-    int_mv this_mv;
-    get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx, mbmi->ref_frame,
-                x->mbmi_ext);
-    const PREDICTION_MODE single_mode =
-        get_single_mode(this_mode, i, is_comp_pred);
-    if (single_mode == NEWMV) {
-      cur_mv[i] = this_mv;
-    } else {
-      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
-    }
-  }
-  return ret;
-}
-
-static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
-                               const MB_MODE_INFO_EXT *mbmi_ext,
-                               int (*drl_mode_cost0)[2],
-                               int8_t ref_frame_type) {
-  int cost = 0;
-  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
-    for (int idx = 0; idx < 2; ++idx) {
-      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-        uint8_t drl_ctx =
-            av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
-        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
-        if (mbmi->ref_mv_idx == idx) return cost;
-      }
-    }
-    return cost;
-  }
-
-  if (have_nearmv_in_inter_mode(mbmi->mode)) {
-    for (int idx = 1; idx < 3; ++idx) {
-      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-        uint8_t drl_ctx =
-            av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
-        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
-        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
-      }
-    }
-    return cost;
-  }
-  return cost;
-}
-
-// Struct for buffers used by compound_type_rd() function.
-// For sizes and alignment of these arrays, refer to
-// alloc_compound_type_rd_buffers() function.
-typedef struct {
-  uint8_t *pred0;
-  uint8_t *pred1;
-  int16_t *residual1;          // src - pred1
-  int16_t *diff10;             // pred1 - pred0
-  uint8_t *tmp_best_mask_buf;  // backup of the best segmentation mask
-} CompoundTypeRdBuffers;
-
-static int compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
-                            BLOCK_SIZE bsize, int mi_col, int mi_row,
-                            int_mv *cur_mv, int masked_compound_used,
-                            BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst,
-                            CompoundTypeRdBuffers *buffers, int *rate_mv,
-                            int64_t *rd, RD_STATS *rd_stats,
-                            int64_t ref_best_rd) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const PREDICTION_MODE this_mode = mbmi->mode;
-  const int bw = block_size_wide[bsize];
-  int rate_sum, rs2;
-  int64_t dist_sum;
-
-  int_mv best_mv[2];
-  int best_tmp_rate_mv = *rate_mv;
-  int tmp_skip_txfm_sb;
-  int64_t tmp_skip_sse_sb;
-  INTERINTER_COMPOUND_DATA best_compound_data;
-  best_compound_data.type = COMPOUND_AVERAGE;
-  uint8_t *preds0[1] = { buffers->pred0 };
-  uint8_t *preds1[1] = { buffers->pred1 };
-  int strides[1] = { bw };
-  int tmp_rate_mv;
-  const int num_pix = 1 << num_pels_log2_lookup[bsize];
-  const int mask_len = 2 * num_pix * sizeof(uint8_t);
-  COMPOUND_TYPE cur_type;
-  int best_compmode_interinter_cost = 0;
-  int calc_pred_masked_compound = 1;
-
-  best_mv[0].as_int = cur_mv[0].as_int;
-  best_mv[1].as_int = cur_mv[1].as_int;
-  *rd = INT64_MAX;
-  for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
-    if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
-    if (!is_interinter_compound_used(cur_type, bsize)) continue;
-    tmp_rate_mv = *rate_mv;
-    int64_t best_rd_cur = INT64_MAX;
-    mbmi->interinter_comp.type = cur_type;
-    int masked_type_cost = 0;
-
-    const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
-    const int comp_index_ctx = get_comp_index_context(cm, xd);
-    mbmi->compound_idx = 1;
-    if (cur_type == COMPOUND_AVERAGE) {
-      mbmi->comp_group_idx = 0;
-      if (masked_compound_used) {
-        masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][0];
-      }
-      masked_type_cost += x->comp_idx_cost[comp_index_ctx][1];
-      rs2 = masked_type_cost;
-      const int64_t mode_rd = RDCOST(x->rdmult, rs2 + rd_stats->rate, 0);
-      if (mode_rd < ref_best_rd) {
-        av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize);
-        int64_t est_rd =
-            estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
-                                &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
-        if (est_rd != INT64_MAX)
-          best_rd_cur = RDCOST(x->rdmult, rs2 + *rate_mv + rate_sum, dist_sum);
-      }
-      // use spare buffer for following compound type try
-      restore_dst_buf(xd, *tmp_dst, 1);
-    } else {
-      mbmi->comp_group_idx = 1;
-      masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][1];
-      masked_type_cost += x->compound_type_cost[bsize][cur_type - 1];
-      rs2 = masked_type_cost;
-      if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
-          *rd / 3 < ref_best_rd) {
-        best_rd_cur = build_and_cost_compound_type(
-            cpi, x, cur_mv, bsize, this_mode, &rs2, *rate_mv, orig_dst,
-            &tmp_rate_mv, preds0, preds1, buffers->residual1, buffers->diff10,
-            strides, mi_row, mi_col, rd_stats->rate, ref_best_rd,
-            &calc_pred_masked_compound);
-      }
-    }
-    if (best_rd_cur < *rd) {
-      *rd = best_rd_cur;
-      best_compound_data = mbmi->interinter_comp;
-      if (masked_compound_used && cur_type != COMPOUND_TYPES - 1) {
-        memcpy(buffers->tmp_best_mask_buf, xd->seg_mask, mask_len);
-      }
-      best_compmode_interinter_cost = rs2;
-      if (have_newmv_in_inter_mode(this_mode)) {
-        if (cur_type == COMPOUND_WEDGE) {
-          best_tmp_rate_mv = tmp_rate_mv;
-          best_mv[0].as_int = mbmi->mv[0].as_int;
-          best_mv[1].as_int = mbmi->mv[1].as_int;
-        } else {
-          best_mv[0].as_int = cur_mv[0].as_int;
-          best_mv[1].as_int = cur_mv[1].as_int;
-        }
-      }
-    }
-    // reset to original mvs for next iteration
-    mbmi->mv[0].as_int = cur_mv[0].as_int;
-    mbmi->mv[1].as_int = cur_mv[1].as_int;
-  }
-  if (mbmi->interinter_comp.type != best_compound_data.type) {
-    mbmi->comp_group_idx =
-        (best_compound_data.type == COMPOUND_AVERAGE) ? 0 : 1;
-    mbmi->interinter_comp = best_compound_data;
-    memcpy(xd->seg_mask, buffers->tmp_best_mask_buf, mask_len);
-  }
-  if (have_newmv_in_inter_mode(this_mode)) {
-    mbmi->mv[0].as_int = best_mv[0].as_int;
-    mbmi->mv[1].as_int = best_mv[1].as_int;
-    if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
-      rd_stats->rate += best_tmp_rate_mv - *rate_mv;
-      *rate_mv = best_tmp_rate_mv;
-    }
-  }
-  restore_dst_buf(xd, *orig_dst, 1);
-  return best_compmode_interinter_cost;
-}
-
-static INLINE int is_single_newmv_valid(HandleInterModeArgs *args,
-                                        MB_MODE_INFO *mbmi,
-                                        PREDICTION_MODE this_mode) {
-  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
-    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx, 1);
-    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
-    if (single_mode == NEWMV &&
-        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
-      return 0;
-    }
-  }
-  return 1;
-}
-
-static int get_drl_refmv_count(const MACROBLOCK *const x,
-                               const MV_REFERENCE_FRAME *ref_frame,
-                               PREDICTION_MODE mode) {
-  MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
-  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
-  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
-  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
-  const int has_drl =
-      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
-  const int ref_set =
-      has_drl ? AOMMIN(MAX_REF_MV_SERCH, ref_mv_count - has_nearmv) : 1;
-
-  return ref_set;
-}
-
-typedef struct {
-  int64_t rd;
-  int drl_cost;
-  int rate_mv;
-  int_mv mv;
-} inter_mode_info;
-
-static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                 BLOCK_SIZE bsize, RD_STATS *rd_stats,
-                                 RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
-                                 int *disable_skip, int mi_row, int mi_col,
-                                 HandleInterModeArgs *args, int64_t ref_best_rd,
-                                 uint8_t *const tmp_buf,
-                                 CompoundTypeRdBuffers *rd_buffers
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-                                 ,
-                                 TileDataEnc *tile_data, int64_t *best_est_rd,
-                                 const int do_tx_search,
-                                 InterModesInfo *inter_modes_info
-#endif
-) {
-  const AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-  const int is_comp_pred = has_second_ref(mbmi);
-  const PREDICTION_MODE this_mode = mbmi->mode;
-  int i;
-  int refs[2] = { mbmi->ref_frame[0],
-                  (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
-  int rate_mv = 0;
-  int64_t rd = INT64_MAX;
-
-  // do first prediction into the destination buffer. Do the next
-  // prediction into a temporary buffer. Then keep track of which one
-  // of these currently holds the best predictor, and use the other
-  // one for future predictions. In the end, copy from tmp_buf to
-  // dst if necessary.
-  struct macroblockd_plane *p = xd->plane;
-  BUFFER_SET orig_dst = {
-    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
-    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
-  };
-  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
-                                 tmp_buf + 2 * MAX_SB_SQUARE },
-                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
-
-  int skip_txfm_sb = 0;
-  int64_t skip_sse_sb = INT64_MAX;
-  int16_t mode_ctx;
-  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
-                                   cm->seq_params.enable_masked_compound;
-  int64_t ret_val = INT64_MAX;
-  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
-  int64_t best_rd = INT64_MAX;
-  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-  MB_MODE_INFO best_mbmi = *mbmi;
-  int best_disable_skip;
-  int best_xskip;
-  int64_t newmv_ret_val = INT64_MAX;
-  int_mv backup_mv[2] = { { 0 } };
-  int backup_rate_mv = 0;
-  inter_mode_info mode_info[MAX_REF_MV_SERCH];
-
-  int comp_idx;
-  const int search_jnt_comp = is_comp_pred & cm->seq_params.enable_jnt_comp &
-                              (mbmi->mode != GLOBAL_GLOBALMV);
-
-  // TODO(jingning): This should be deprecated shortly.
-  const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
-  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
-
-  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
-    mode_info[ref_mv_idx].mv.as_int = INVALID_MV;
-    mode_info[ref_mv_idx].rd = INT64_MAX;
-
-    if (cpi->sf.reduce_inter_modes && ref_mv_idx > 0) {
-      if (mbmi->ref_frame[0] == LAST2_FRAME ||
-          mbmi->ref_frame[0] == LAST3_FRAME ||
-          mbmi->ref_frame[1] == LAST2_FRAME ||
-          mbmi->ref_frame[1] == LAST3_FRAME) {
-        if (mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx + has_nearmv]
-                .weight < REF_CAT_LEVEL) {
-          continue;
-        }
-      }
-    }
-
-    av1_init_rd_stats(rd_stats);
-
-    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
-    mbmi->comp_group_idx = 0;
-    mbmi->compound_idx = 1;
-    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
-
-    mode_ctx =
-        av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
-
-    mbmi->num_proj_ref = 0;
-    mbmi->motion_mode = SIMPLE_TRANSLATION;
-    mbmi->ref_mv_idx = ref_mv_idx;
-
-    if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, this_mode))) {
-      continue;
-    }
-
-    rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
-    const int drl_cost =
-        get_drl_cost(mbmi, mbmi_ext, x->drl_mode_cost0, ref_frame_type);
-    rd_stats->rate += drl_cost;
-    mode_info[ref_mv_idx].drl_cost = drl_cost;
-
-    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
-        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
-      continue;
-    }
-
-    int64_t best_rd2 = INT64_MAX;
-
-    const RD_STATS backup_rd_stats = *rd_stats;
-    // If !search_jnt_comp, we need to force mbmi->compound_idx = 1.
-    for (comp_idx = 1; comp_idx >= !search_jnt_comp; --comp_idx) {
-      int rs = 0;
-      int compmode_interinter_cost = 0;
-      mbmi->compound_idx = comp_idx;
-      if (is_comp_pred && comp_idx == 0) {
-        *rd_stats = backup_rd_stats;
-        mbmi->interinter_comp.type = COMPOUND_AVERAGE;
-        if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
-        mbmi->num_proj_ref = 0;
-        mbmi->motion_mode = SIMPLE_TRANSLATION;
-        mbmi->comp_group_idx = 0;
-
-        const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
-        const int comp_index_ctx = get_comp_index_context(cm, xd);
-        if (masked_compound_used) {
-          compmode_interinter_cost +=
-              x->comp_group_idx_cost[comp_group_idx_ctx][0];
-        }
-        compmode_interinter_cost += x->comp_idx_cost[comp_index_ctx][0];
-      }
-
-      int_mv cur_mv[2];
-      if (!build_cur_mv(cur_mv, this_mode, cm, x)) {
-        continue;
-      }
-      if (have_newmv_in_inter_mode(this_mode)) {
-        if (comp_idx == 0) {
-          cur_mv[0] = backup_mv[0];
-          cur_mv[1] = backup_mv[1];
-          rate_mv = backup_rate_mv;
-        }
-
-        // when jnt_comp_skip_mv_search flag is on, new mv will be searched once
-        if (!(search_jnt_comp && cpi->sf.jnt_comp_skip_mv_search &&
-              comp_idx == 0)) {
-          newmv_ret_val = handle_newmv(cpi, x, bsize, cur_mv, mi_row, mi_col,
-                                       &rate_mv, args);
-
-          // Store cur_mv and rate_mv so that they can be restored in the next
-          // iteration of the loop
-          backup_mv[0] = cur_mv[0];
-          backup_mv[1] = cur_mv[1];
-          backup_rate_mv = rate_mv;
-        }
-
-        if (newmv_ret_val != 0) {
-          continue;
-        } else {
-          rd_stats->rate += rate_mv;
-        }
-
-        if (cpi->sf.skip_repeated_newmv) {
-          if (!is_comp_pred && this_mode == NEWMV && ref_mv_idx > 0) {
-            int skip = 0;
-            int this_rate_mv = 0;
-            for (i = 0; i < ref_mv_idx; ++i) {
-              // Check if the motion search result same as previous results
-              if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int) {
-                // If the compared mode has no valid rd, it is unlikely this
-                // mode will be the best mode
-                if (mode_info[i].rd == INT64_MAX) {
-                  skip = 1;
-                  break;
-                }
-                // Compare the cost difference including drl cost and mv cost
-                if (mode_info[i].mv.as_int != INVALID_MV) {
-                  const int compare_cost =
-                      mode_info[i].rate_mv + mode_info[i].drl_cost;
-                  const int_mv ref_mv = av1_get_ref_mv(x, 0);
-                  this_rate_mv = av1_mv_bit_cost(&mode_info[i].mv.as_mv,
-                                                 &ref_mv.as_mv, x->nmvjointcost,
-                                                 x->mvcost, MV_COST_WEIGHT);
-                  const int this_cost = this_rate_mv + drl_cost;
-
-                  if (compare_cost < this_cost) {
-                    skip = 1;
-                    break;
-                  } else {
-                    // If the cost is less than current best result, make this
-                    // the best and update corresponding variables
-                    if (best_mbmi.ref_mv_idx == i) {
-                      assert(best_rd != INT64_MAX);
-                      best_mbmi.ref_mv_idx = ref_mv_idx;
-                      best_rd_stats.rate += this_cost - compare_cost;
-                      best_rd = RDCOST(x->rdmult, best_rd_stats.rate,
-                                       best_rd_stats.dist);
-                      if (best_rd < ref_best_rd) ref_best_rd = best_rd;
-
-                      skip = 1;
-                      break;
-                    }
-                  }
-                }
-              }
-            }
-            if (skip) {
-              args->modelled_rd[this_mode][ref_mv_idx][refs[0]] =
-                  args->modelled_rd[this_mode][i][refs[0]];
-              args->simple_rd[this_mode][ref_mv_idx][refs[0]] =
-                  args->simple_rd[this_mode][i][refs[0]];
-              mode_info[ref_mv_idx].rd = mode_info[i].rd;
-              mode_info[ref_mv_idx].rate_mv = this_rate_mv;
-              mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int;
-
-              restore_dst_buf(xd, orig_dst, num_planes);
-              continue;
-            }
-          }
-        }
-      }
-      for (i = 0; i < is_comp_pred + 1; ++i) {
-        mbmi->mv[i].as_int = cur_mv[i].as_int;
-      }
-      const int ref_mv_cost = cost_mv_ref(x, this_mode, mode_ctx);
-#if USE_DISCOUNT_NEWMV_TEST
-      // We don't include the cost of the second reference here, because there
-      // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in
-      // other words if you present them in that order, the second one is always
-      // known if the first is known.
-      //
-      // Under some circumstances we discount the cost of new mv mode to
-      // encourage initiation of a motion field.
-      if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) {
-        // discount_newmv_test only applies discount on NEWMV mode.
-        assert(this_mode == NEWMV);
-        rd_stats->rate += AOMMIN(cost_mv_ref(x, this_mode, mode_ctx),
-                                 cost_mv_ref(x, NEARESTMV, mode_ctx));
-      } else {
-        rd_stats->rate += ref_mv_cost;
-      }
-#else
-      rd_stats->rate += ref_mv_cost;
-#endif
-
-      if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
-          mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
-        continue;
-      }
-
-      int skip_build_pred = 0;
-      if (is_comp_pred && comp_idx) {
-        // Find matching interp filter or set to default interp filter
-        const int need_search =
-            av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd);
-        int match_found = -1;
-        const InterpFilter assign_filter = cm->interp_filter;
-        if (cpi->sf.skip_repeat_interpolation_filter_search && need_search) {
-          match_found = find_interp_filter_in_stats(x, mbmi);
-        }
-        if (!need_search || match_found == -1) {
-          set_default_interp_filters(mbmi, assign_filter);
-        }
-
-        int64_t best_rd_compound;
-        compmode_interinter_cost = compound_type_rd(
-            cpi, x, bsize, mi_col, mi_row, cur_mv, masked_compound_used,
-            &orig_dst, &tmp_dst, rd_buffers, &rate_mv, &best_rd_compound,
-            rd_stats, ref_best_rd);
-        if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
-          restore_dst_buf(xd, orig_dst, num_planes);
-          continue;
-        }
-        // No need to call av1_build_inter_predictors_sby if
-        // COMPOUND_AVERAGE is selected because it is the first
-        // candidate in compound_type_rd, and the following
-        // compound types searching uses tmp_dst buffer
-        if (mbmi->interinter_comp.type == COMPOUND_AVERAGE) {
-          if (num_planes > 1)
-            av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, &orig_dst,
-                                            bsize);
-          skip_build_pred = 1;
-        }
-      }
-
-      ret_val = interpolation_filter_search(
-          x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst,
-          args->single_filter, &rd, &rs, &skip_txfm_sb, &skip_sse_sb,
-          skip_build_pred, args, ref_best_rd);
-      if (args->modelled_rd != NULL && !is_comp_pred) {
-        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
-      }
-      if (ret_val != 0) {
-        restore_dst_buf(xd, orig_dst, num_planes);
-        continue;
-      } else if (cpi->sf.model_based_post_interp_filter_breakout &&
-                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
-        restore_dst_buf(xd, orig_dst, num_planes);
-        if ((rd >> 3) * 2 > ref_best_rd) break;
-        continue;
-      }
-
-      if (search_jnt_comp) {
-        // if 1/2 model rd is larger than best_rd in jnt_comp mode,
-        // use jnt_comp mode, save additional search
-        if ((rd >> 3) * 4 > best_rd) {
-          restore_dst_buf(xd, orig_dst, num_planes);
-          continue;
-        }
-      }
-
-      if (!is_comp_pred)
-        args->single_filter[this_mode][refs[0]] =
-            av1_extract_interp_filter(mbmi->interp_filters, 0);
-
-      if (args->modelled_rd != NULL) {
-        if (is_comp_pred) {
-          const int mode0 = compound_ref0_mode(this_mode);
-          const int mode1 = compound_ref1_mode(this_mode);
-          const int64_t mrd =
-              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
-                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
-          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
-            restore_dst_buf(xd, orig_dst, num_planes);
-            continue;
-          }
-        }
-      }
-      rd_stats->rate += compmode_interinter_cost;
-
-      if (search_jnt_comp && cpi->sf.jnt_comp_fast_tx_search && comp_idx == 0) {
-        // TODO(chengchen): this speed feature introduces big loss.
-        // Need better estimation of rate distortion.
-        int dummy_rate;
-        int64_t dummy_dist;
-        int plane_rate[MAX_MB_PLANE] = { 0 };
-        int64_t plane_sse[MAX_MB_PLANE] = { 0 };
-        int64_t plane_dist[MAX_MB_PLANE] = { 0 };
-
-        model_rd_sb_fn[MODELRD_TYPE_JNT_COMPOUND](
-            cpi, bsize, x, xd, 0, num_planes - 1, mi_row, mi_col, &dummy_rate,
-            &dummy_dist, &skip_txfm_sb, &skip_sse_sb, plane_rate, plane_sse,
-            plane_dist);
-
-        rd_stats->rate += rs;
-        rd_stats->rate += plane_rate[0] + plane_rate[1] + plane_rate[2];
-        rd_stats_y->rate = plane_rate[0];
-        rd_stats_uv->rate = plane_rate[1] + plane_rate[2];
-        rd_stats->sse = plane_sse[0] + plane_sse[1] + plane_sse[2];
-        rd_stats_y->sse = plane_sse[0];
-        rd_stats_uv->sse = plane_sse[1] + plane_sse[2];
-        rd_stats->dist = plane_dist[0] + plane_dist[1] + plane_dist[2];
-        rd_stats_y->dist = plane_dist[0];
-        rd_stats_uv->dist = plane_dist[1] + plane_dist[2];
-      } else {
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-        ret_val = motion_mode_rd(
-            cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, disable_skip,
-            mi_row, mi_col, args, ref_best_rd, refs, &rate_mv, &orig_dst,
-            tile_data, best_est_rd, do_tx_search, inter_modes_info);
-#else
-        ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y,
-                                 rd_stats_uv, disable_skip, mi_row, mi_col,
-                                 args, ref_best_rd, refs, &rate_mv, &orig_dst);
-#endif
-      }
-      mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int;
-      mode_info[ref_mv_idx].rate_mv = rate_mv;
-      if (ret_val != INT64_MAX) {
-        int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-        mode_info[ref_mv_idx].rd = tmp_rd;
-        if (tmp_rd < best_rd) {
-          best_rd_stats = *rd_stats;
-          best_rd_stats_y = *rd_stats_y;
-          best_rd_stats_uv = *rd_stats_uv;
-          best_rd = tmp_rd;
-          best_mbmi = *mbmi;
-          best_disable_skip = *disable_skip;
-          best_xskip = x->skip;
-          memcpy(best_blk_skip, x->blk_skip,
-                 sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w);
-        }
-
-        if (tmp_rd < best_rd2) {
-          best_rd2 = tmp_rd;
-        }
-
-        if (tmp_rd < ref_best_rd) {
-          ref_best_rd = tmp_rd;
-        }
-      }
-      restore_dst_buf(xd, orig_dst, num_planes);
-    }
-  }
-
-  if (best_rd == INT64_MAX) return INT64_MAX;
-
-  // re-instate status of the best choice
-  *rd_stats = best_rd_stats;
-  *rd_stats_y = best_rd_stats_y;
-  *rd_stats_uv = best_rd_stats_uv;
-  *mbmi = best_mbmi;
-  *disable_skip = best_disable_skip;
-  x->skip = best_xskip;
-  assert(IMPLIES(mbmi->comp_group_idx == 1,
-                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
-  memcpy(x->blk_skip, best_blk_skip,
-         sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w);
-
-  return RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-}
-
-static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
-                                       RD_STATS *rd_cost, BLOCK_SIZE bsize,
-                                       int64_t best_rd) {
-  const AV1_COMMON *const cm = &cpi->common;
-  if (!av1_allow_intrabc(cm)) return INT64_MAX;
-  const int num_planes = av1_num_planes(cm);
-
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const TileInfo *tile = &xd->tile;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
-  const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
-  const int w = block_size_wide[bsize];
-  const int h = block_size_high[bsize];
-  const int sb_row = mi_row >> cm->seq_params.mib_size_log2;
-  const int sb_col = mi_col >> cm->seq_params.mib_size_log2;
-
-  MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-  MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
-  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
-                   mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row,
-                   mi_col, mbmi_ext->mode_context);
-
-  int_mv nearestmv, nearmv;
-  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
-                                   0);
-
-  if (nearestmv.as_int == INVALID_MV) {
-    nearestmv.as_int = 0;
-  }
-  if (nearmv.as_int == INVALID_MV) {
-    nearmv.as_int = 0;
-  }
-
-  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
-  if (dv_ref.as_int == 0)
-    av1_find_ref_dv(&dv_ref, tile, cm->seq_params.mib_size, mi_row, mi_col);
-  // Ref DV should not have sub-pel.
-  assert((dv_ref.as_mv.col & 7) == 0);
-  assert((dv_ref.as_mv.row & 7) == 0);
-  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
-
-  struct buf_2d yv12_mb[MAX_MB_PLANE];
-  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL,
-                       num_planes);
-  for (int i = 0; i < num_planes; ++i) {
-    xd->plane[i].pre[0] = yv12_mb[i];
-  }
-
-  enum IntrabcMotionDirection {
-    IBC_MOTION_ABOVE,
-    IBC_MOTION_LEFT,
-    IBC_MOTION_DIRECTIONS
-  };
-
-  MB_MODE_INFO best_mbmi = *mbmi;
-  RD_STATS best_rdcost = *rd_cost;
-  int best_skip = x->skip;
-
-  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
-  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
-       dir < IBC_MOTION_DIRECTIONS; ++dir) {
-    const MvLimits tmp_mv_limits = x->mv_limits;
-    switch (dir) {
-      case IBC_MOTION_ABOVE:
-        x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
-        x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
-        x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
-        x->mv_limits.row_max =
-            (sb_row * cm->seq_params.mib_size - mi_row) * MI_SIZE - h;
-        break;
-      case IBC_MOTION_LEFT:
-        x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
-        x->mv_limits.col_max =
-            (sb_col * cm->seq_params.mib_size - mi_col) * MI_SIZE - w;
-        // TODO(aconverse@google.com): Minimize the overlap between above and
-        // left areas.
-        x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
-        int bottom_coded_mi_edge =
-            AOMMIN((sb_row + 1) * cm->seq_params.mib_size, tile->mi_row_end);
-        x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
-        break;
-      default: assert(0);
-    }
-    assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
-    assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
-    assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
-    assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
-    av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
-
-    if (x->mv_limits.col_max < x->mv_limits.col_min ||
-        x->mv_limits.row_max < x->mv_limits.row_min) {
-      x->mv_limits = tmp_mv_limits;
-      continue;
-    }
-
-    int step_param = cpi->mv_step_param;
-    MV mvp_full = dv_ref.as_mv;
-    mvp_full.col >>= 3;
-    mvp_full.row >>= 3;
-    int sadpb = x->sadperbit16;
-    int cost_list[5];
-    int bestsme = av1_full_pixel_search(
-        cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0,
-        sadpb, cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
-        (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1);
-
-    x->mv_limits = tmp_mv_limits;
-    if (bestsme == INT_MAX) continue;
-    mvp_full = x->best_mv.as_mv;
-    MV dv = { .row = mvp_full.row * 8, .col = mvp_full.col * 8 };
-    if (mv_check_bounds(&x->mv_limits, &dv)) continue;
-    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
-                         cm->seq_params.mib_size_log2))
-      continue;
-
-    // DV should not have sub-pel.
-    assert((dv.col & 7) == 0);
-    assert((dv.row & 7) == 0);
-    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
-    mbmi->filter_intra_mode_info.use_filter_intra = 0;
-    mbmi->use_intrabc = 1;
-    mbmi->mode = DC_PRED;
-    mbmi->uv_mode = UV_DC_PRED;
-    mbmi->motion_mode = SIMPLE_TRANSLATION;
-    mbmi->mv[0].as_mv = dv;
-    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
-    mbmi->skip = 0;
-    x->skip = 0;
-    av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
-
-    int *dvcost[2] = { (int *)&cpi->dv_cost[0][MV_MAX],
-                       (int *)&cpi->dv_cost[1][MV_MAX] };
-    // TODO(aconverse@google.com): The full motion field defining discount
-    // in MV_COST_WEIGHT is too large. Explore other values.
-    int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, cpi->dv_joint_cost,
-                                  dvcost, MV_COST_WEIGHT_SUB);
-    const int rate_mode = x->intrabc_cost[1];
-    RD_STATS rd_stats, rd_stats_uv;
-    av1_subtract_plane(x, bsize, 0);
-    if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
-      // Intrabc
-      select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX);
-    } else {
-      super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
-      memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
-      for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
-        set_blk_skip(x, 0, i, rd_stats.skip);
-    }
-    if (num_planes > 1) {
-      super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
-      av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
-    }
-#if CONFIG_RD_DEBUG
-    mbmi->rd_stats = rd_stats;
-#endif
-
-    const int skip_ctx = av1_get_skip_context(xd);
-
-    RD_STATS rdc_noskip;
-    av1_init_rd_stats(&rdc_noskip);
-    rdc_noskip.rate =
-        rate_mode + rate_mv + rd_stats.rate + x->skip_cost[skip_ctx][0];
-    rdc_noskip.dist = rd_stats.dist;
-    rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist);
-    if (rdc_noskip.rdcost < best_rd) {
-      best_rd = rdc_noskip.rdcost;
-      best_mbmi = *mbmi;
-      best_skip = x->skip;
-      best_rdcost = rdc_noskip;
-      memcpy(best_blk_skip, x->blk_skip,
-             sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
-    }
-
-    if (!xd->lossless[mbmi->segment_id]) {
-      x->skip = 1;
-      mbmi->skip = 1;
-      RD_STATS rdc_skip;
-      av1_init_rd_stats(&rdc_skip);
-      rdc_skip.rate = rate_mode + rate_mv + x->skip_cost[skip_ctx][1];
-      rdc_skip.dist = rd_stats.sse;
-      rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist);
-      if (rdc_skip.rdcost < best_rd) {
-        best_rd = rdc_skip.rdcost;
-        best_mbmi = *mbmi;
-        best_skip = x->skip;
-        best_rdcost = rdc_skip;
-        memcpy(best_blk_skip, x->blk_skip,
-               sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
-      }
-    }
-  }
-  *mbmi = best_mbmi;
-  *rd_cost = best_rdcost;
-  x->skip = best_skip;
-  memcpy(x->blk_skip, best_blk_skip,
-         sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w);
-  return best_rd;
-}
-
-void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
-                               int mi_col, RD_STATS *rd_cost, BLOCK_SIZE bsize,
-                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int num_planes = av1_num_planes(cm);
-  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
-  int y_skip = 0, uv_skip = 0;
-  int64_t dist_y = 0, dist_uv = 0;
-  TX_SIZE max_uv_tx_size;
-
-  ctx->skip = 0;
-  mbmi->ref_frame[0] = INTRA_FRAME;
-  mbmi->ref_frame[1] = NONE_FRAME;
-  mbmi->use_intrabc = 0;
-  mbmi->mv[0].as_int = 0;
-
-  const int64_t intra_yrd =
-      rd_pick_intra_sby_mode(cpi, x, mi_row, mi_col, &rate_y, &rate_y_tokenonly,
-                             &dist_y, &y_skip, bsize, best_rd, ctx);
-
-  if (intra_yrd < best_rd) {
-    // Only store reconstructed luma when there's chroma RDO. When there's no
-    // chroma RDO, the reconstructed luma will be stored in encode_superblock().
-    xd->cfl.is_chroma_reference =
-        is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
-                            cm->seq_params.subsampling_y);
-    xd->cfl.store_y = store_cfl_required_rdo(cm, x);
-    if (xd->cfl.store_y) {
-      // Restore reconstructed luma values.
-      memcpy(x->blk_skip, ctx->blk_skip,
-             sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-      av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y,
-                                   cpi->optimize_seg_arr[mbmi->segment_id],
-                                   mi_row, mi_col);
-      xd->cfl.store_y = 0;
-    }
-    if (num_planes > 1) {
-      max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
-      init_sbuv_mode(mbmi);
-      if (!x->skip_chroma_rd)
-        rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
-                                &uv_skip, bsize, max_uv_tx_size);
-    }
-
-    if (y_skip && (uv_skip || x->skip_chroma_rd)) {
-      rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
-                      x->skip_cost[av1_get_skip_context(xd)][1];
-      rd_cost->dist = dist_y + dist_uv;
-    } else {
-      rd_cost->rate =
-          rate_y + rate_uv + x->skip_cost[av1_get_skip_context(xd)][0];
-      rd_cost->dist = dist_y + dist_uv;
-    }
-    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
-  } else {
-    rd_cost->rate = INT_MAX;
-  }
-
-  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
-    best_rd = rd_cost->rdcost;
-  if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
-    ctx->skip = x->skip;
-    memcpy(ctx->blk_skip, x->blk_skip,
-           sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-    assert(rd_cost->rate != INT_MAX);
-  }
-  if (rd_cost->rate == INT_MAX) return;
-
-  ctx->mic = *xd->mi[0];
-  ctx->mbmi_ext = *x->mbmi_ext;
-}
-
-static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  int src_stride = x->plane[1].src.stride;
-  const uint8_t *const src_u = x->plane[1].src.buf;
-  const uint8_t *const src_v = x->plane[2].src.buf;
-  int *const data = x->palette_buffer->kmeans_data_buf;
-  int centroids[2 * PALETTE_MAX_SIZE];
-  uint8_t *const color_map = xd->plane[1].color_index_map;
-  int r, c;
-  const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
-  const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
-  int plane_block_width, plane_block_height, rows, cols;
-  av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
-                           &plane_block_height, &rows, &cols);
-
-  for (r = 0; r < rows; ++r) {
-    for (c = 0; c < cols; ++c) {
-      if (cpi->common.seq_params.use_highbitdepth) {
-        data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
-        data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
-      } else {
-        data[(r * cols + c) * 2] = src_u[r * src_stride + c];
-        data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
-      }
-    }
-  }
-
-  for (r = 1; r < 3; ++r) {
-    for (c = 0; c < pmi->palette_size[1]; ++c) {
-      centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
-    }
-  }
-
-  av1_calc_indices(data, centroids, color_map, rows * cols,
-                   pmi->palette_size[1], 2);
-  extend_palette_color_map(color_map, cols, rows, plane_block_width,
-                           plane_block_height);
-}
-
-static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
-                                      const MACROBLOCKD *xd, int mi_row,
-                                      int mi_col, const uint8_t *above,
-                                      int above_stride, const uint8_t *left,
-                                      int left_stride);
-
-static const int ref_frame_flag_list[REF_FRAMES] = { 0,
-                                                     AOM_LAST_FLAG,
-                                                     AOM_LAST2_FLAG,
-                                                     AOM_LAST3_FLAG,
-                                                     AOM_GOLD_FLAG,
-                                                     AOM_BWD_FLAG,
-                                                     AOM_ALT2_FLAG,
-                                                     AOM_ALT_FLAG };
-
-static void rd_pick_skip_mode(RD_STATS *rd_cost,
-                              InterModeSearchState *search_state,
-                              const AV1_COMP *const cpi, MACROBLOCK *const x,
-                              BLOCK_SIZE bsize, int mi_row, int mi_col,
-                              struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-
-  x->compound_idx = 1;  // COMPOUND_AVERAGE
-  RD_STATS skip_mode_rd_stats;
-  av1_invalid_rd_stats(&skip_mode_rd_stats);
-
-  if (cm->ref_frame_idx_0 == INVALID_IDX ||
-      cm->ref_frame_idx_1 == INVALID_IDX) {
-    return;
-  }
-
-  const MV_REFERENCE_FRAME ref_frame = LAST_FRAME + cm->ref_frame_idx_0;
-  const MV_REFERENCE_FRAME second_ref_frame = LAST_FRAME + cm->ref_frame_idx_1;
-  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
-  const int mode_index =
-      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
-
-  if (mode_index == -1) {
-    return;
-  }
-
-  mbmi->mode = this_mode;
-  mbmi->uv_mode = UV_DC_PRED;
-  mbmi->ref_frame[0] = ref_frame;
-  mbmi->ref_frame[1] = second_ref_frame;
-  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-  if (x->mbmi_ext->ref_mv_count[ref_frame_type] == UINT8_MAX) {
-    if (x->mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
-        x->mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
-      return;
-    }
-    MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
-    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
-                     mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row,
-                     mi_col, mbmi_ext->mode_context);
-  }
-
-  assert(this_mode == NEAREST_NEARESTMV);
-  if (!build_cur_mv(mbmi->mv, this_mode, cm, x)) {
-    return;
-  }
-
-  mbmi->filter_intra_mode_info.use_filter_intra = 0;
-  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
-  mbmi->comp_group_idx = 0;
-  mbmi->compound_idx = x->compound_idx;
-  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
-  mbmi->motion_mode = SIMPLE_TRANSLATION;
-  mbmi->ref_mv_idx = 0;
-  mbmi->skip_mode = mbmi->skip = 1;
-
-  set_default_interp_filters(mbmi, cm->interp_filter);
-
-  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-  for (int i = 0; i < num_planes; i++) {
-    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
-    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
-  }
-
-  BUFFER_SET orig_dst;
-  for (int i = 0; i < num_planes; i++) {
-    orig_dst.plane[i] = xd->plane[i].dst.buf;
-    orig_dst.stride[i] = xd->plane[i].dst.stride;
-  }
-
-  // Obtain the rdcost for skip_mode.
-  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, mi_row, mi_col, &orig_dst);
-
-  // Compare the use of skip_mode with the best intra/inter mode obtained.
-  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
-  const int64_t best_intra_inter_mode_cost =
-      (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX)
-          ? RDCOST(x->rdmult,
-                   rd_cost->rate + x->skip_mode_cost[skip_mode_ctx][0],
-                   rd_cost->dist)
-          : INT64_MAX;
-
-  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost) {
-    assert(mode_index != -1);
-    search_state->best_mbmode.skip_mode = 1;
-    search_state->best_mbmode = *mbmi;
-
-    search_state->best_mbmode.skip_mode = search_state->best_mbmode.skip = 1;
-    search_state->best_mbmode.mode = NEAREST_NEARESTMV;
-    search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0];
-    search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1];
-    search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int;
-    search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int;
-    search_state->best_mbmode.ref_mv_idx = 0;
-
-    // Set up tx_size related variables for skip-specific loop filtering.
-    search_state->best_mbmode.tx_size =
-        block_signals_txsize(bsize) ? tx_size_from_tx_mode(bsize, cm->tx_mode)
-                                    : max_txsize_rect_lookup[bsize];
-    memset(search_state->best_mbmode.inter_tx_size,
-           search_state->best_mbmode.tx_size,
-           sizeof(search_state->best_mbmode.inter_tx_size));
-    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->n4_w, xd->n4_h,
-                  search_state->best_mbmode.skip && is_inter_block(mbmi), xd);
-
-    // Set up color-related variables for skip mode.
-    search_state->best_mbmode.uv_mode = UV_DC_PRED;
-    search_state->best_mbmode.palette_mode_info.palette_size[0] = 0;
-    search_state->best_mbmode.palette_mode_info.palette_size[1] = 0;
-
-    search_state->best_mbmode.comp_group_idx = 0;
-    search_state->best_mbmode.compound_idx = x->compound_idx;
-    search_state->best_mbmode.interinter_comp.type = COMPOUND_AVERAGE;
-    search_state->best_mbmode.motion_mode = SIMPLE_TRANSLATION;
-
-    search_state->best_mbmode.interintra_mode =
-        (INTERINTRA_MODE)(II_DC_PRED - 1);
-    search_state->best_mbmode.filter_intra_mode_info.use_filter_intra = 0;
-
-    set_default_interp_filters(&search_state->best_mbmode, cm->interp_filter);
-
-    search_state->best_mode_index = mode_index;
-
-    // Update rd_cost
-    rd_cost->rate = skip_mode_rd_stats.rate;
-    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
-    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
-
-    search_state->best_rd = rd_cost->rdcost;
-    search_state->best_skip2 = 1;
-    search_state->best_mode_skippable = (skip_mode_rd_stats.sse == 0);
-
-    x->skip = 1;
-  }
-}
-
-// speed feature: fast intra/inter transform type search
-// Used for speed >= 2
-// When this speed feature is on, in rd mode search, only DCT is used.
-// After the mode is determined, this function is called, to select
-// transform types and get accurate rdcost.
-static void sf_refine_fast_tx_type_search(
-    const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
-    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-    int best_mode_index, MB_MODE_INFO *best_mbmode,
-    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int best_rate_y,
-    int best_rate_uv, int *best_skip2) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int num_planes = av1_num_planes(cm);
-
-  if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
-      ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
-        is_inter_mode(best_mbmode->mode)) ||
-       (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
-        !is_inter_mode(best_mbmode->mode)))) {
-    int skip_blk = 0;
-    RD_STATS rd_stats_y, rd_stats_uv;
-
-    x->use_default_inter_tx_type = 0;
-    x->use_default_intra_tx_type = 0;
-
-    *mbmi = *best_mbmode;
-
-    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-
-    // Select prediction reference frames.
-    for (int i = 0; i < num_planes; i++) {
-      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
-      if (has_second_ref(mbmi))
-        xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
-    }
-
-    if (is_inter_mode(mbmi->mode)) {
-      av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
-      if (mbmi->motion_mode == OBMC_CAUSAL)
-        av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
-
-      av1_subtract_plane(x, bsize, 0);
-      if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
-        // av1_rd_pick_inter_mode_sb
-        select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col,
-                           INT64_MAX);
-        assert(rd_stats_y.rate != INT_MAX);
-      } else {
-        super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
-        memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
-        for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
-          set_blk_skip(x, 0, i, rd_stats_y.skip);
-      }
-      if (num_planes > 1) {
-        inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX, INT64_MAX,
-                         FTXS_NONE);
-      } else {
-        av1_init_rd_stats(&rd_stats_uv);
-      }
-    } else {
-      super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
-      if (num_planes > 1) {
-        super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
-      } else {
-        av1_init_rd_stats(&rd_stats_uv);
-      }
-    }
-
-    if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
-               (rd_stats_y.dist + rd_stats_uv.dist)) >
-        RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
-      skip_blk = 1;
-      rd_stats_y.rate = x->skip_cost[av1_get_skip_context(xd)][1];
-      rd_stats_uv.rate = 0;
-      rd_stats_y.dist = rd_stats_y.sse;
-      rd_stats_uv.dist = rd_stats_uv.sse;
-    } else {
-      skip_blk = 0;
-      rd_stats_y.rate += x->skip_cost[av1_get_skip_context(xd)][0];
-    }
-
-    if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
-        RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
-               (rd_stats_y.dist + rd_stats_uv.dist))) {
-      best_mbmode->tx_size = mbmi->tx_size;
-      av1_copy(best_mbmode->inter_tx_size, mbmi->inter_tx_size);
-      memcpy(ctx->blk_skip, x->blk_skip,
-             sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-      av1_copy(best_mbmode->txk_type, mbmi->txk_type);
-      rd_cost->rate +=
-          (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
-      rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
-      rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
-      *best_skip2 = skip_blk;
-    }
-  }
-}
-
-// Please add/modify parameter setting in this function, making it consistent
-// and easy to read and maintain.
-static void set_params_rd_pick_inter_mode(
-    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
-    BLOCK_SIZE bsize, int mi_row, int mi_col, uint16_t ref_frame_skip_mask[2],
-    uint32_t mode_skip_mask[REF_FRAMES], int skip_ref_frame_mask,
-    unsigned int ref_costs_single[REF_FRAMES],
-    unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES],
-    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-  const struct segmentation *const seg = &cm->seg;
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  unsigned char segment_id = mbmi->segment_id;
-  int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-  int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
-                                   MAX_SB_SIZE >> 1 };
-  int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
-                                    MAX_SB_SIZE >> 1 };
-  int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-
-  for (int i = 0; i < MB_MODE_COUNT; ++i)
-    for (int k = 0; k < REF_FRAMES; ++k) args->single_filter[i][k] = SWITCHABLE;
-
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    int len = sizeof(uint16_t);
-    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
-    args->above_pred_buf[1] =
-        CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len);
-    args->above_pred_buf[2] =
-        CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
-    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
-    args->left_pred_buf[1] =
-        CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len);
-    args->left_pred_buf[2] =
-        CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
-  } else {
-    args->above_pred_buf[0] = x->above_pred_buf;
-    args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1);
-    args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE;
-    args->left_pred_buf[0] = x->left_pred_buf;
-    args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1);
-    args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE;
-  }
-
-  av1_collect_neighbors_ref_counts(xd);
-
-  estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
-                           ref_costs_comp);
-
-  MV_REFERENCE_FRAME ref_frame;
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    x->pred_mv_sad[ref_frame] = INT_MAX;
-    x->mbmi_ext->mode_context[ref_frame] = 0;
-    x->mbmi_ext->compound_mode_context[ref_frame] = 0;
-    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
-    if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame]) {
-      if (mbmi->partition != PARTITION_NONE &&
-          mbmi->partition != PARTITION_SPLIT) {
-        if (skip_ref_frame_mask & (1 << ref_frame)) {
-          int skip = 1;
-          for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
-            if (!(skip_ref_frame_mask & (1 << r))) {
-              const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
-              if (rf[0] == ref_frame || rf[1] == ref_frame) {
-                skip = 0;
-                break;
-              }
-            }
-          }
-          if (skip) continue;
-        }
-      }
-      assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
-      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
-                                 yv12_mb);
-    }
-  }
-  // ref_frame = ALTREF_FRAME
-  for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
-    x->mbmi_ext->mode_context[ref_frame] = 0;
-    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
-    const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
-    if (!((cpi->ref_frame_flags & ref_frame_flag_list[rf[0]]) &&
-          (cpi->ref_frame_flags & ref_frame_flag_list[rf[1]]))) {
-      continue;
-    }
-
-    if (mbmi->partition != PARTITION_NONE &&
-        mbmi->partition != PARTITION_SPLIT) {
-      if (skip_ref_frame_mask & (1 << ref_frame)) {
-        continue;
-      }
-    }
-    av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
-                     mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row,
-                     mi_col, mbmi_ext->mode_context);
-  }
-
-  av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
-
-  if (check_num_overlappable_neighbors(mbmi) &&
-      is_motion_variation_allowed_bsize(bsize)) {
-    av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
-                                        args->above_pred_buf, dst_width1,
-                                        dst_height1, args->above_pred_stride);
-    av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
-                                       args->left_pred_buf, dst_width2,
-                                       dst_height2, args->left_pred_stride);
-    av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
-                         mi_col, 0, num_planes);
-    calc_target_weighted_pred(
-        cm, x, xd, mi_row, mi_col, args->above_pred_buf[0],
-        args->above_pred_stride[0], args->left_pred_buf[0],
-        args->left_pred_stride[0]);
-  }
-
-  int min_pred_mv_sad = INT_MAX;
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
-    min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
-
-  for (int i = 0; i < 2; ++i) {
-    ref_frame_skip_mask[i] = 0;
-  }
-  memset(mode_skip_mask, 0, REF_FRAMES * sizeof(*mode_skip_mask));
-  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
-    if (!(cpi->ref_frame_flags & ref_frame_flag_list[ref_frame])) {
-      // Skip checking missing references in both single and compound reference
-      // modes. Note that a mode will be skipped iff both reference frames
-      // are masked out.
-      ref_frame_skip_mask[0] |= (1 << ref_frame);
-      ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
-    } else {
-      // Skip fixed mv modes for poor references
-      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
-        mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
-      }
-    }
-    // If the segment reference frame feature is enabled....
-    // then do nothing if the current ref frame is not allowed..
-    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
-        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
-      ref_frame_skip_mask[0] |= (1 << ref_frame);
-      ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
-    }
-  }
-
-  // Disable this drop out case if the ref frame
-  // segment level feature is enabled for this segment. This is to
-  // prevent the possibility that we end up unable to pick any mode.
-  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
-    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
-    // unless ARNR filtering is enabled in which case we want
-    // an unfiltered alternative. We allow near/nearest as well
-    // because they may result in zero-zero MVs but be cheaper.
-    if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
-      ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
-                               (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) |
-                               (1 << ALTREF2_FRAME) | (1 << GOLDEN_FRAME);
-      ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
-      // TODO(zoeliu): To further explore whether following needs to be done for
-      //               BWDREF_FRAME as well.
-      mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
-      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
-      int_mv near_mv, nearest_mv, global_mv;
-      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
-      get_this_mv(&near_mv, NEARMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
-      get_this_mv(&global_mv, GLOBALMV, 0, 0, tmp_ref_frames, x->mbmi_ext);
-
-      if (near_mv.as_int != global_mv.as_int)
-        mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
-      if (nearest_mv.as_int != global_mv.as_int)
-        mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
-    }
-  }
-
-  if (cpi->rc.is_src_frame_alt_ref) {
-    if (sf->alt_ref_search_fp) {
-      assert(cpi->ref_frame_flags & ref_frame_flag_list[ALTREF_FRAME]);
-      mode_skip_mask[ALTREF_FRAME] = 0;
-      ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
-      ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
-    }
-  }
-
-  if (sf->alt_ref_search_fp)
-    if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
-      if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
-        mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
-
-  if (sf->adaptive_mode_search) {
-    if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
-        cpi->rc.frames_since_golden >= 3)
-      if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
-        mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
-  }
-
-  if (bsize > sf->max_intra_bsize) {
-    ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
-    ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
-  }
-
-  mode_skip_mask[INTRA_FRAME] |=
-      ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
-
-  if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
-    x->use_default_intra_tx_type = 1;
-  else
-    x->use_default_intra_tx_type = 0;
-
-  if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
-    x->use_default_inter_tx_type = 1;
-  else
-    x->use_default_inter_tx_type = 0;
-  if (cpi->sf.skip_repeat_interpolation_filter_search) {
-    x->interp_filter_stats_idx[0] = 0;
-    x->interp_filter_stats_idx[1] = 0;
-  }
-}
-
-static void search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
-                                int mi_col, RD_STATS *rd_cost,
-                                PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
-                                MB_MODE_INFO *const mbmi,
-                                PALETTE_MODE_INFO *const pmi,
-                                unsigned int *ref_costs_single,
-                                InterModeSearchState *search_state) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCKD *const xd = &x->e_mbd;
-  int rate2 = 0;
-  int64_t distortion2 = 0, best_rd_palette = search_state->best_rd, this_rd,
-          best_model_rd_palette = INT64_MAX;
-  int skippable = 0, rate_overhead_palette = 0;
-  RD_STATS rd_stats_y;
-  TX_SIZE uv_tx = TX_4X4;
-  uint8_t *const best_palette_color_map =
-      x->palette_buffer->best_palette_color_map;
-  uint8_t *const color_map = xd->plane[0].color_index_map;
-  MB_MODE_INFO best_mbmi_palette = *mbmi;
-  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-  const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
-  const int rows = block_size_high[bsize];
-  const int cols = block_size_wide[bsize];
-
-  mbmi->mode = DC_PRED;
-  mbmi->uv_mode = UV_DC_PRED;
-  mbmi->ref_frame[0] = INTRA_FRAME;
-  mbmi->ref_frame[1] = NONE_FRAME;
-  rate_overhead_palette = rd_pick_palette_intra_sby(
-      cpi, x, bsize, mi_row, mi_col, intra_mode_cost[DC_PRED],
-      &best_mbmi_palette, best_palette_color_map, &best_rd_palette,
-      &best_model_rd_palette, NULL, NULL, NULL, NULL, ctx, best_blk_skip);
-  if (pmi->palette_size[0] == 0) return;
-
-  memcpy(x->blk_skip, best_blk_skip,
-         sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
-
-  memcpy(color_map, best_palette_color_map,
-         rows * cols * sizeof(best_palette_color_map[0]));
-  super_block_yrd(cpi, x, &rd_stats_y, bsize, search_state->best_rd);
-  if (rd_stats_y.rate == INT_MAX) return;
-
-  skippable = rd_stats_y.skip;
-  distortion2 = rd_stats_y.dist;
-  rate2 = rd_stats_y.rate + rate_overhead_palette;
-  rate2 += ref_costs_single[INTRA_FRAME];
-  if (num_planes > 1) {
-    uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
-    if (search_state->rate_uv_intra[uv_tx] == INT_MAX) {
-      choose_intra_uv_mode(
-          cpi, x, bsize, uv_tx, &search_state->rate_uv_intra[uv_tx],
-          &search_state->rate_uv_tokenonly[uv_tx],
-          &search_state->dist_uvs[uv_tx], &search_state->skip_uvs[uv_tx],
-          &search_state->mode_uv[uv_tx]);
-      search_state->pmi_uv[uv_tx] = *pmi;
-      search_state->uv_angle_delta[uv_tx] = mbmi->angle_delta[PLANE_TYPE_UV];
-    }
-    mbmi->uv_mode = search_state->mode_uv[uv_tx];
-    pmi->palette_size[1] = search_state->pmi_uv[uv_tx].palette_size[1];
-    if (pmi->palette_size[1] > 0) {
-      memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
-             search_state->pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
-             2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
-    }
-    mbmi->angle_delta[PLANE_TYPE_UV] = search_state->uv_angle_delta[uv_tx];
-    skippable = skippable && search_state->skip_uvs[uv_tx];
-    distortion2 += search_state->dist_uvs[uv_tx];
-    rate2 += search_state->rate_uv_intra[uv_tx];
-  }
-
-  if (skippable) {
-    rate2 -= rd_stats_y.rate;
-    if (num_planes > 1) rate2 -= search_state->rate_uv_tokenonly[uv_tx];
-    rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
-  } else {
-    rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
-  }
-  this_rd = RDCOST(x->rdmult, rate2, distortion2);
-  if (this_rd < search_state->best_rd) {
-    search_state->best_mode_index = 3;
-    mbmi->mv[0].as_int = 0;
-    rd_cost->rate = rate2;
-    rd_cost->dist = distortion2;
-    rd_cost->rdcost = this_rd;
-    search_state->best_rd = this_rd;
-    search_state->best_mbmode = *mbmi;
-    search_state->best_skip2 = 0;
-    search_state->best_mode_skippable = skippable;
-    memcpy(ctx->blk_skip, x->blk_skip,
-           sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-  }
-}
-
-static void init_inter_mode_search_state(InterModeSearchState *search_state,
-                                         const AV1_COMP *cpi,
-                                         const TileDataEnc *tile_data,
-                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
-                                         int64_t best_rd_so_far) {
-  search_state->best_rd = best_rd_so_far;
-
-  av1_zero(search_state->best_mbmode);
-
-  search_state->best_rate_y = INT_MAX;
-
-  search_state->best_rate_uv = INT_MAX;
-
-  search_state->best_mode_skippable = 0;
-
-  search_state->best_skip2 = 0;
-
-  search_state->best_mode_index = -1;
-
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const unsigned char segment_id = mbmi->segment_id;
-
-  search_state->skip_intra_modes = 0;
-
-  search_state->num_available_refs = 0;
-  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
-  memset(search_state->dist_order_refs, -1,
-         sizeof(search_state->dist_order_refs));
-
-  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
-    search_state->mode_threshold[i] = 0;
-  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
-  for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
-    search_state->mode_threshold[i] =
-        ((int64_t)rd_threshes[i] * tile_data->thresh_freq_fact[bsize][i]) >> 5;
-
-  search_state->best_intra_mode = DC_PRED;
-  search_state->best_intra_rd = INT64_MAX;
-
-  search_state->angle_stats_ready = 0;
-
-  search_state->best_pred_sse = UINT_MAX;
-
-  for (int i = 0; i < TX_SIZES_ALL; i++)
-    search_state->rate_uv_intra[i] = INT_MAX;
-
-  av1_zero(search_state->pmi_uv);
-
-  for (int i = 0; i < REFERENCE_MODES; ++i)
-    search_state->best_pred_rd[i] = INT64_MAX;
-
-  av1_zero(search_state->single_newmv);
-  av1_zero(search_state->single_newmv_rate);
-  av1_zero(search_state->single_newmv_valid);
-  for (int i = 0; i < MB_MODE_COUNT; ++i) {
-    for (int j = 0; j < MAX_REF_MV_SERCH; ++j) {
-      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
-        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
-        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
-      }
-    }
-  }
-
-  for (int dir = 0; dir < 2; ++dir) {
-    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
-      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
-        SingleInterModeState *state;
-
-        state = &search_state->single_state[dir][mode][ref_frame];
-        state->ref_frame = NONE_FRAME;
-        state->rd = INT64_MAX;
-
-        state = &search_state->single_state_modelled[dir][mode][ref_frame];
-        state->ref_frame = NONE_FRAME;
-        state->rd = INT64_MAX;
-      }
-    }
-  }
-  for (int dir = 0; dir < 2; ++dir) {
-    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
-      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
-        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
-      }
-    }
-  }
-  av1_zero(search_state->single_state_cnt);
-  av1_zero(search_state->single_state_modelled_cnt);
-}
-
-// Case 1: return 0, means don't skip this mode
-// Case 2: return 1, means skip this mode completely
-// Case 3: return 2, means skip compound only, but still try single motion modes
-static int inter_mode_search_order_independent_skip(
-    const AV1_COMP *cpi, const PICK_MODE_CONTEXT *ctx, const MACROBLOCK *x,
-    BLOCK_SIZE bsize, int mode_index, int mi_row, int mi_col,
-    uint32_t *mode_skip_mask, uint16_t *ref_frame_skip_mask,
-    InterModeSearchState *search_state) {
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  const AV1_COMMON *const cm = &cpi->common;
-  const struct segmentation *const seg = &cm->seg;
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const unsigned char segment_id = mbmi->segment_id;
-  const MV_REFERENCE_FRAME *ref_frame = av1_mode_order[mode_index].ref_frame;
-  const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
-  int skip_motion_mode = 0;
-  if (mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) {
-    const int ref_type = av1_ref_frame_type(ref_frame);
-    int skip_ref = ctx->skip_ref_frame_mask & (1 << ref_type);
-    if (ref_type <= ALTREF_FRAME && skip_ref) {
-      // Since the compound ref modes depends on the motion estimation result of
-      // two single ref modes( best mv of single ref modes as the start point )
-      // If current single ref mode is marked skip, we need to check if it will
-      // be used in compound ref modes.
-      for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
-        if (!(ctx->skip_ref_frame_mask & (1 << r))) {
-          const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
-          if (rf[0] == ref_type || rf[1] == ref_type) {
-            // Found a not skipped compound ref mode which contains current
-            // single ref. So this single ref can't be skipped completly
-            // Just skip it's motion mode search, still try it's simple
-            // transition mode.
-            skip_motion_mode = 1;
-            skip_ref = 0;
-            break;
-          }
-        }
-      }
-    }
-    if (skip_ref) return 1;
-  }
-
-  if (cpi->sf.mode_pruning_based_on_two_pass_partition_search &&
-      !x->cb_partition_scan) {
-    const int mi_width = mi_size_wide[bsize];
-    const int mi_height = mi_size_high[bsize];
-    int found = 0;
-    // Search in the stats table to see if the ref frames have been used in the
-    // first pass of partition search.
-    for (int row = mi_row; row < mi_row + mi_width && !found;
-         row += FIRST_PARTITION_PASS_SAMPLE_REGION) {
-      for (int col = mi_col; col < mi_col + mi_height && !found;
-           col += FIRST_PARTITION_PASS_SAMPLE_REGION) {
-        const int index = av1_first_partition_pass_stats_index(row, col);
-        const FIRST_PARTITION_PASS_STATS *const stats =
-            &x->first_partition_pass_stats[index];
-        if (stats->ref0_counts[ref_frame[0]] &&
-            (ref_frame[1] < 0 || stats->ref1_counts[ref_frame[1]])) {
-          found = 1;
-          break;
-        }
-      }
-    }
-    if (!found) return 1;
-  }
-
-  if (ref_frame[0] > INTRA_FRAME && ref_frame[1] == INTRA_FRAME) {
-    // Mode must by compatible
-    if (!is_interintra_allowed_mode(this_mode)) return 1;
-    if (!is_interintra_allowed_bsize(bsize)) return 1;
-  }
-
-  // This is only used in motion vector unit test.
-  if (cpi->oxcf.motion_vector_unit_test && ref_frame[0] == INTRA_FRAME)
-    return 1;
-
-  if (ref_frame[0] == INTRA_FRAME) {
-    if (this_mode != DC_PRED) {
-      // Disable intra modes other than DC_PRED for blocks with low variance
-      // Threshold for intra skipping based on source variance
-      // TODO(debargha): Specialize the threshold for super block sizes
-      const unsigned int skip_intra_var_thresh = 64;
-      if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
-          x->source_variance < skip_intra_var_thresh)
-        return 1;
-    }
-  } else {
-    if (!is_comp_ref_allowed(bsize) && ref_frame[1] > INTRA_FRAME) return 1;
-  }
-
-  const int comp_pred = ref_frame[1] > INTRA_FRAME;
-  if (comp_pred) {
-    if (!cpi->allow_comp_inter_inter) return 1;
-
-    if (cm->reference_mode == SINGLE_REFERENCE) return 1;
-
-    // Skip compound inter modes if ARF is not available.
-    if (!(cpi->ref_frame_flags & ref_frame_flag_list[ref_frame[1]])) return 1;
-
-    // Do not allow compound prediction if the segment level reference frame
-    // feature is in use as in this case there can only be one reference.
-    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
-  }
-
-  if (sf->selective_ref_frame) {
-    if (sf->selective_ref_frame >= 2 || x->cb_partition_scan) {
-      if (ref_frame[0] == ALTREF2_FRAME || ref_frame[1] == ALTREF2_FRAME)
-        if (get_relative_dist(
-                cm, cm->cur_frame->ref_frame_offset[ALTREF2_FRAME - LAST_FRAME],
-                cm->frame_offset) < 0)
-          return 1;
-      if (ref_frame[0] == BWDREF_FRAME || ref_frame[1] == BWDREF_FRAME)
-        if (get_relative_dist(
-                cm, cm->cur_frame->ref_frame_offset[BWDREF_FRAME - LAST_FRAME],
-                cm->frame_offset) < 0)
-          return 1;
-    }
-    if (ref_frame[0] == LAST3_FRAME || ref_frame[1] == LAST3_FRAME)
-      if (get_relative_dist(
-              cm, cm->cur_frame->ref_frame_offset[LAST3_FRAME - LAST_FRAME],
-              cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0)
-        return 1;
-    if (ref_frame[0] == LAST2_FRAME || ref_frame[1] == LAST2_FRAME)
-      if (get_relative_dist(
-              cm, cm->cur_frame->ref_frame_offset[LAST2_FRAME - LAST_FRAME],
-              cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0)
-        return 1;
-  }
-
-  // One-sided compound is used only when all reference frames are one-sided.
-  if (sf->selective_ref_frame && comp_pred && !cpi->all_one_sided_refs) {
-    unsigned int ref_offsets[2];
-    for (int i = 0; i < 2; ++i) {
-      const int buf_idx = cm->frame_refs[ref_frame[i] - LAST_FRAME].idx;
-      assert(buf_idx >= 0);
-      ref_offsets[i] = cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
-    }
-    if ((get_relative_dist(cm, ref_offsets[0], cm->frame_offset) <= 0 &&
-         get_relative_dist(cm, ref_offsets[1], cm->frame_offset) <= 0) ||
-        (get_relative_dist(cm, ref_offsets[0], cm->frame_offset) > 0 &&
-         get_relative_dist(cm, ref_offsets[1], cm->frame_offset) > 0))
-      return 1;
-  }
-
-  if (mode_skip_mask[ref_frame[0]] & (1 << this_mode)) {
-    return 1;
-  }
-
-  if ((ref_frame_skip_mask[0] & (1 << ref_frame[0])) &&
-      (ref_frame_skip_mask[1] & (1 << AOMMAX(0, ref_frame[1])))) {
-    return 1;
-  }
-
-  if (skip_repeated_mv(cm, x, this_mode, ref_frame, search_state)) {
-    return 1;
-  }
-  if (skip_motion_mode) {
-    return 2;
-  }
-  return 0;
-}
-
-static INLINE void init_mbmi(MB_MODE_INFO *mbmi, int mode_index,
-                             const AV1_COMMON *cm) {
-  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
-  mbmi->ref_mv_idx = 0;
-  mbmi->mode = this_mode;
-  mbmi->uv_mode = UV_DC_PRED;
-  mbmi->ref_frame[0] = av1_mode_order[mode_index].ref_frame[0];
-  mbmi->ref_frame[1] = av1_mode_order[mode_index].ref_frame[1];
-  pmi->palette_size[0] = 0;
-  pmi->palette_size[1] = 0;
-  mbmi->filter_intra_mode_info.use_filter_intra = 0;
-  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
-  mbmi->motion_mode = SIMPLE_TRANSLATION;
-  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
-  set_default_interp_filters(mbmi, cm->interp_filter);
-}
-
-static int64_t handle_intra_mode(InterModeSearchState *search_state,
-                                 const AV1_COMP *cpi, MACROBLOCK *x,
-                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                 int ref_frame_cost,
-                                 const PICK_MODE_CONTEXT *ctx, int disable_skip,
-                                 RD_STATS *rd_stats, RD_STATS *rd_stats_y,
-                                 RD_STATS *rd_stats_uv) {
-  const AV1_COMMON *cm = &cpi->common;
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  assert(mbmi->ref_frame[0] == INTRA_FRAME);
-  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const int try_palette =
-      av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
-  const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
-  const int intra_cost_penalty = av1_get_intra_cost_penalty(
-      cm->base_qindex, cm->y_dc_delta_q, cm->seq_params.bit_depth);
-  const int rows = block_size_high[bsize];
-  const int cols = block_size_wide[bsize];
-  const int num_planes = av1_num_planes(cm);
-  const int skip_ctx = av1_get_skip_context(xd);
-
-  int known_rate = intra_mode_cost[mbmi->mode];
-  known_rate += ref_frame_cost;
-  if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED)
-    known_rate += intra_cost_penalty;
-  known_rate += AOMMIN(x->skip_cost[skip_ctx][0], x->skip_cost[skip_ctx][1]);
-  const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0);
-  if (known_rd > search_state->best_rd) {
-    search_state->skip_intra_modes = 1;
-    return INT64_MAX;
-  }
-
-  TX_SIZE uv_tx;
-  int is_directional_mode = av1_is_directional_mode(mbmi->mode);
-  if (is_directional_mode && av1_use_angle_delta(bsize)) {
-    int rate_dummy;
-    int64_t model_rd = INT64_MAX;
-    if (!search_state->angle_stats_ready) {
-      const int src_stride = x->plane[0].src.stride;
-      const uint8_t *src = x->plane[0].src.buf;
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-        highbd_angle_estimation(src, src_stride, rows, cols, bsize,
-                                search_state->directional_mode_skip_mask);
-      else
-        angle_estimation(src, src_stride, rows, cols, bsize,
-                         search_state->directional_mode_skip_mask);
-      search_state->angle_stats_ready = 1;
-    }
-    if (search_state->directional_mode_skip_mask[mbmi->mode]) return INT64_MAX;
-    av1_init_rd_stats(rd_stats_y);
-    rd_stats_y->rate = INT_MAX;
-    rd_pick_intra_angle_sby(cpi, x, mi_row, mi_col, &rate_dummy, rd_stats_y,
-                            bsize, intra_mode_cost[mbmi->mode],
-                            search_state->best_rd, &model_rd);
-  } else {
-    av1_init_rd_stats(rd_stats_y);
-    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
-    super_block_yrd(cpi, x, rd_stats_y, bsize, search_state->best_rd);
-  }
-  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-  memcpy(best_blk_skip, x->blk_skip,
-         sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
-  int try_filter_intra = 0;
-  int64_t best_rd_tmp = INT64_MAX;
-  if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) {
-    if (rd_stats_y->rate != INT_MAX) {
-      const int tmp_rate = rd_stats_y->rate + x->filter_intra_cost[bsize][0] +
-                           intra_mode_cost[mbmi->mode];
-      best_rd_tmp = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist);
-      try_filter_intra = !((best_rd_tmp / 2) > search_state->best_rd);
-    } else {
-      try_filter_intra = !(search_state->best_mbmode.skip);
-    }
-  }
-  if (try_filter_intra) {
-    RD_STATS rd_stats_y_fi;
-    int filter_intra_selected_flag = 0;
-    TX_SIZE best_tx_size = mbmi->tx_size;
-    TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN];
-    memcpy(best_txk_type, mbmi->txk_type,
-           sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
-    FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
-
-    mbmi->filter_intra_mode_info.use_filter_intra = 1;
-    for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED;
-         fi_mode < FILTER_INTRA_MODES; ++fi_mode) {
-      int64_t this_rd_tmp;
-      mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode;
-      super_block_yrd(cpi, x, &rd_stats_y_fi, bsize, search_state->best_rd);
-      if (rd_stats_y_fi.rate == INT_MAX) {
-        continue;
-      }
-      const int this_rate_tmp =
-          rd_stats_y_fi.rate +
-          intra_mode_info_cost_y(cpi, x, mbmi, bsize,
-                                 intra_mode_cost[mbmi->mode]);
-      this_rd_tmp = RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
-
-      if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > search_state->best_rd) {
-        break;
-      }
-      if (this_rd_tmp < best_rd_tmp) {
-        best_tx_size = mbmi->tx_size;
-        memcpy(best_txk_type, mbmi->txk_type,
-               sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
-        memcpy(best_blk_skip, x->blk_skip,
-               sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
-        best_fi_mode = fi_mode;
-        *rd_stats_y = rd_stats_y_fi;
-        filter_intra_selected_flag = 1;
-        best_rd_tmp = this_rd_tmp;
-      }
-    }
-
-    mbmi->tx_size = best_tx_size;
-    memcpy(mbmi->txk_type, best_txk_type,
-           sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN);
-    memcpy(x->blk_skip, best_blk_skip,
-           sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-
-    if (filter_intra_selected_flag) {
-      mbmi->filter_intra_mode_info.use_filter_intra = 1;
-      mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode;
-    } else {
-      mbmi->filter_intra_mode_info.use_filter_intra = 0;
-    }
-  }
-  if (rd_stats_y->rate == INT_MAX) return INT64_MAX;
-  const int mode_cost_y =
-      intra_mode_info_cost_y(cpi, x, mbmi, bsize, intra_mode_cost[mbmi->mode]);
-  av1_init_rd_stats(rd_stats);
-  av1_init_rd_stats(rd_stats_uv);
-  if (num_planes > 1) {
-    uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
-    if (search_state->rate_uv_intra[uv_tx] == INT_MAX) {
-      int rate_y =
-          rd_stats_y->skip ? x->skip_cost[skip_ctx][1] : rd_stats_y->rate;
-      const int64_t rdy =
-          RDCOST(x->rdmult, rate_y + mode_cost_y, rd_stats_y->dist);
-      if (search_state->best_rd < (INT64_MAX / 2) &&
-          rdy > (search_state->best_rd + (search_state->best_rd >> 2))) {
-        search_state->skip_intra_modes = 1;
-        return INT64_MAX;
-      }
-      choose_intra_uv_mode(
-          cpi, x, bsize, uv_tx, &search_state->rate_uv_intra[uv_tx],
-          &search_state->rate_uv_tokenonly[uv_tx],
-          &search_state->dist_uvs[uv_tx], &search_state->skip_uvs[uv_tx],
-          &search_state->mode_uv[uv_tx]);
-      if (try_palette) search_state->pmi_uv[uv_tx] = *pmi;
-      search_state->uv_angle_delta[uv_tx] = mbmi->angle_delta[PLANE_TYPE_UV];
-
-      const int uv_rate = search_state->rate_uv_tokenonly[uv_tx];
-      const int64_t uv_dist = search_state->dist_uvs[uv_tx];
-      const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist);
-      if (uv_rd > search_state->best_rd) {
-        search_state->skip_intra_modes = 1;
-        return INT64_MAX;
-      }
-    }
-
-    rd_stats_uv->rate = search_state->rate_uv_tokenonly[uv_tx];
-    rd_stats_uv->dist = search_state->dist_uvs[uv_tx];
-    rd_stats_uv->skip = search_state->skip_uvs[uv_tx];
-    rd_stats->skip = rd_stats_y->skip && rd_stats_uv->skip;
-    mbmi->uv_mode = search_state->mode_uv[uv_tx];
-    if (try_palette) {
-      pmi->palette_size[1] = search_state->pmi_uv[uv_tx].palette_size[1];
-      memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
-             search_state->pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
-             2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
-    }
-    mbmi->angle_delta[PLANE_TYPE_UV] = search_state->uv_angle_delta[uv_tx];
-  }
-  rd_stats->rate = rd_stats_y->rate + mode_cost_y;
-  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
-    // super_block_yrd above includes the cost of the tx_size in the
-    // tokenonly rate, but for intra blocks, tx_size is always coded
-    // (prediction granularity), so we account for it in the full rate,
-    // not the tokenonly rate.
-    rd_stats_y->rate -= tx_size_cost(cm, x, bsize, mbmi->tx_size);
-  }
-  if (num_planes > 1 && !x->skip_chroma_rd) {
-    const int uv_mode_cost =
-        x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mbmi->uv_mode];
-    rd_stats->rate +=
-        rd_stats_uv->rate +
-        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
-  }
-  if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED)
-    rd_stats->rate += intra_cost_penalty;
-  rd_stats->dist = rd_stats_y->dist + rd_stats_uv->dist;
-
-  // Estimate the reference frame signaling cost and add it
-  // to the rolling cost variable.
-  rd_stats->rate += ref_frame_cost;
-  if (rd_stats->skip) {
-    // Back out the coefficient coding costs
-    rd_stats->rate -= (rd_stats_y->rate + rd_stats_uv->rate);
-    rd_stats_y->rate = 0;
-    rd_stats_uv->rate = 0;
-    // Cost the skip mb case
-    rd_stats->rate += x->skip_cost[skip_ctx][1];
-  } else {
-    // Add in the cost of the no skip flag.
-    rd_stats->rate += x->skip_cost[skip_ctx][0];
-  }
-  // Calculate the final RD estimate for this mode.
-  const int64_t this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-  // Keep record of best intra rd
-  if (this_rd < search_state->best_intra_rd) {
-    search_state->best_intra_rd = this_rd;
-    search_state->best_intra_mode = mbmi->mode;
-  }
-
-  if (sf->skip_intra_in_interframe) {
-    if (search_state->best_rd < (INT64_MAX / 2) &&
-        this_rd > (search_state->best_rd + (search_state->best_rd >> 1)))
-      search_state->skip_intra_modes = 1;
-  }
-
-  if (!disable_skip) {
-    for (int i = 0; i < REFERENCE_MODES; ++i)
-      search_state->best_pred_rd[i] =
-          AOMMIN(search_state->best_pred_rd[i], this_rd);
-  }
-  return this_rd;
-}
-
-static void collect_single_states(MACROBLOCK *x,
-                                  InterModeSearchState *search_state,
-                                  const MB_MODE_INFO *const mbmi) {
-  int i, j;
-  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
-  const PREDICTION_MODE this_mode = mbmi->mode;
-  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
-  const int mode_offset = INTER_OFFSET(this_mode);
-  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
-
-  // Simple rd
-  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
-  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
-    int64_t rd = search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
-    if (rd < simple_rd) simple_rd = rd;
-  }
-
-  // Insertion sort of single_state
-  SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
-  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
-  i = search_state->single_state_cnt[dir][mode_offset];
-  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
-    state_s[j] = state_s[j - 1];
-  state_s[j] = this_state_s;
-  search_state->single_state_cnt[dir][mode_offset]++;
-
-  // Modelled rd
-  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
-  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
-    int64_t rd = search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
-    if (rd < modelled_rd) modelled_rd = rd;
-  }
-
-  // Insertion sort of single_state_modelled
-  SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
-  SingleInterModeState *state_m =
-      search_state->single_state_modelled[dir][mode_offset];
-  i = search_state->single_state_modelled_cnt[dir][mode_offset];
-  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
-    state_m[j] = state_m[j - 1];
-  state_m[j] = this_state_m;
-  search_state->single_state_modelled_cnt[dir][mode_offset]++;
-}
-
-static void analyze_single_states(const AV1_COMP *cpi,
-                                  InterModeSearchState *search_state) {
-  int i, j, dir, mode;
-  if (cpi->sf.prune_comp_search_by_single_result >= 1) {
-    for (dir = 0; dir < 2; ++dir) {
-      int64_t best_rd;
-      SingleInterModeState(*state)[FWD_REFS];
-
-      // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
-      // reference frames for all the modes (NEARESTMV and NEARMV may not
-      // have same motion vectors). Always keep the best of each mode
-      // because it might form the best possible combination with other mode.
-      state = search_state->single_state[dir];
-      best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
-                       state[INTER_OFFSET(GLOBALMV)][0].rd);
-      for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
-        for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
-          if (state[mode][i].rd != INT64_MAX &&
-              (state[mode][i].rd >> 1) > best_rd) {
-            state[mode][i].valid = 0;
-          }
-        }
-      }
-
-      state = search_state->single_state_modelled[dir];
-      best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
-                       state[INTER_OFFSET(GLOBALMV)][0].rd);
-      for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
-        for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode];
-             ++i) {
-          if (state[mode][i].rd != INT64_MAX &&
-              (state[mode][i].rd >> 1) > best_rd) {
-            state[mode][i].valid = 0;
-          }
-        }
-      }
-    }
-  }
-
-  // Ordering by simple rd first, then by modelled rd
-  for (dir = 0; dir < 2; ++dir) {
-    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
-      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
-      const int state_cnt_m =
-          search_state->single_state_modelled_cnt[dir][mode];
-      SingleInterModeState *state_s = search_state->single_state[dir][mode];
-      SingleInterModeState *state_m =
-          search_state->single_state_modelled[dir][mode];
-      int count = 0;
-      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
-      for (i = 0; i < state_cnt_s; ++i) {
-        if (state_s[i].rd == INT64_MAX) break;
-        if (state_s[i].valid)
-          search_state->single_rd_order[dir][mode][count++] =
-              state_s[i].ref_frame;
-      }
-      if (count < max_candidates) {
-        for (i = 0; i < state_cnt_m; ++i) {
-          if (state_m[i].rd == INT64_MAX) break;
-          if (state_m[i].valid) {
-            int ref_frame = state_m[i].ref_frame;
-            int match = 0;
-            // Check if existing already
-            for (j = 0; j < count; ++j) {
-              if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
-                match = 1;
-                break;
-              }
-            }
-            if (!match) {
-              // Check if this ref_frame is removed in simple rd
-              int valid = 1;
-              for (j = 0; j < state_cnt_s; j++) {
-                if (ref_frame == state_s[j].ref_frame && !state_s[j].valid) {
-                  valid = 0;
-                  break;
-                }
-              }
-              if (valid)
-                search_state->single_rd_order[dir][mode][count++] = ref_frame;
-            }
-            if (count >= max_candidates) break;
-          }
-        }
-      }
-    }
-  }
-}
-
-static int compound_skip_get_candidates(
-    const AV1_COMP *cpi, const InterModeSearchState *search_state,
-    const int dir, const PREDICTION_MODE mode) {
-  const int mode_offset = INTER_OFFSET(mode);
-  const SingleInterModeState *state =
-      search_state->single_state[dir][mode_offset];
-  const SingleInterModeState *state_modelled =
-      search_state->single_state_modelled[dir][mode_offset];
-  int max_candidates = 0;
-  int candidates;
-
-  for (int i = 0; i < FWD_REFS; ++i) {
-    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
-    max_candidates++;
-  }
-
-  candidates = max_candidates;
-  if (cpi->sf.prune_comp_search_by_single_result >= 2) {
-    candidates = AOMMIN(2, max_candidates);
-  }
-  if (cpi->sf.prune_comp_search_by_single_result >= 3) {
-    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
-        state[0].ref_frame == state_modelled[0].ref_frame)
-      candidates = 1;
-    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
-  }
-  return candidates;
-}
-
-static int compound_skip_by_single_states(
-    const AV1_COMP *cpi, const InterModeSearchState *search_state,
-    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
-    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
-  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
-  const int mode[2] = { compound_ref0_mode(this_mode),
-                        compound_ref1_mode(this_mode) };
-  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
-  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
-                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
-  int ref_searched[2] = { 0, 0 };
-  int ref_mv_match[2] = { 1, 1 };
-  int i, j;
-
-  for (i = 0; i < 2; ++i) {
-    const SingleInterModeState *state =
-        search_state->single_state[mode_dir[i]][mode_offset[i]];
-    const int state_cnt =
-        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
-    for (j = 0; j < state_cnt; ++j) {
-      if (state[j].ref_frame == refs[i]) {
-        ref_searched[i] = 1;
-        break;
-      }
-    }
-  }
-
-  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
-  for (i = 0; i < 2; ++i) {
-    if (mode[i] == NEARESTMV || mode[i] == NEARMV) {
-      const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
-      int idential = 1;
-      for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
-        int_mv single_mv;
-        int_mv comp_mv;
-        get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, single_refs,
-                    x->mbmi_ext);
-        get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, refs, x->mbmi_ext);
-
-        idential &= (single_mv.as_int == comp_mv.as_int);
-        if (!idential) {
-          ref_mv_match[i] = 0;
-          break;
-        }
-      }
-    }
-  }
-
-  for (i = 0; i < 2; ++i) {
-    if (ref_searched[i] && ref_mv_match[i]) {
-      const int candidates =
-          compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
-      const MV_REFERENCE_FRAME *ref_order =
-          search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
-      int match = 0;
-      for (j = 0; j < candidates; ++j) {
-        if (refs[i] == ref_order[j]) {
-          match = 1;
-          break;
-        }
-      }
-      if (!match) return 1;
-    }
-  }
-
-  return 0;
-}
-
-static INLINE int sf_check_is_drop_ref(const MODE_DEFINITION *mode,
-                                       InterModeSearchState *search_state) {
-  const MV_REFERENCE_FRAME ref_frame = mode->ref_frame[0];
-  const MV_REFERENCE_FRAME second_ref_frame = mode->ref_frame[1];
-  if (search_state->num_available_refs > 2) {
-    if ((ref_frame == search_state->dist_order_refs[0] &&
-         second_ref_frame == search_state->dist_order_refs[1]) ||
-        (ref_frame == search_state->dist_order_refs[1] &&
-         second_ref_frame == search_state->dist_order_refs[0]))
-      return 1;  // drop this pair of refs
-  }
-  return 0;
-}
-
-static INLINE void sf_drop_ref_analyze(InterModeSearchState *search_state,
-                                       const MODE_DEFINITION *mode,
-                                       int64_t distortion2) {
-  const PREDICTION_MODE this_mode = mode->mode;
-  MV_REFERENCE_FRAME ref_frame = mode->ref_frame[0];
-  const int idx = ref_frame - LAST_FRAME;
-  if (idx && distortion2 > search_state->dist_refs[idx]) {
-    search_state->dist_refs[idx] = distortion2;
-    search_state->dist_order_refs[idx] = ref_frame;
-  }
-
-  // Reach the last single ref prediction mode
-  if (ref_frame == ALTREF_FRAME && this_mode == GLOBALMV) {
-    // bubble sort dist_refs and the order index
-    for (int i = 0; i < REF_FRAMES; ++i) {
-      for (int k = i + 1; k < REF_FRAMES; ++k) {
-        if (search_state->dist_refs[i] < search_state->dist_refs[k]) {
-          int64_t tmp_dist = search_state->dist_refs[i];
-          search_state->dist_refs[i] = search_state->dist_refs[k];
-          search_state->dist_refs[k] = tmp_dist;
-
-          int tmp_idx = search_state->dist_order_refs[i];
-          search_state->dist_order_refs[i] = search_state->dist_order_refs[k];
-          search_state->dist_order_refs[k] = tmp_idx;
-        }
-      }
-    }
-    for (int i = 0; i < REF_FRAMES; ++i) {
-      if (search_state->dist_refs[i] == -1) break;
-      search_state->num_available_refs = i;
-    }
-    search_state->num_available_refs++;
-  }
-}
-
-static void alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
-                                           CompoundTypeRdBuffers *const bufs) {
-  CHECK_MEM_ERROR(
-      cm, bufs->pred0,
-      (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred0)));
-  CHECK_MEM_ERROR(
-      cm, bufs->pred1,
-      (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred1)));
-  CHECK_MEM_ERROR(
-      cm, bufs->residual1,
-      (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->residual1)));
-  CHECK_MEM_ERROR(
-      cm, bufs->diff10,
-      (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->diff10)));
-  CHECK_MEM_ERROR(cm, bufs->tmp_best_mask_buf,
-                  (uint8_t *)aom_malloc(2 * MAX_SB_SQUARE *
-                                        sizeof(*bufs->tmp_best_mask_buf)));
-}
-
-static void release_compound_type_rd_buffers(
-    CompoundTypeRdBuffers *const bufs) {
-  aom_free(bufs->pred0);
-  aom_free(bufs->pred1);
-  aom_free(bufs->residual1);
-  aom_free(bufs->diff10);
-  aom_free(bufs->tmp_best_mask_buf);
-  av1_zero(*bufs);  // Set all pointers to NULL for safety.
-}
-
-void av1_rd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
-                               MACROBLOCK *x, int mi_row, int mi_col,
-                               RD_STATS *rd_cost, BLOCK_SIZE bsize,
-                               PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int try_palette =
-      av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
-  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const struct segmentation *const seg = &cm->seg;
-  PREDICTION_MODE this_mode;
-  unsigned char segment_id = mbmi->segment_id;
-  int i;
-  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
-  unsigned int ref_costs_single[REF_FRAMES];
-  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
-  int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)];
-  int *mode_map = tile_data->mode_map[bsize];
-  uint32_t mode_skip_mask[REF_FRAMES];
-  uint16_t ref_frame_skip_mask[2];
-
-  InterModeSearchState search_state;
-  init_inter_mode_search_state(&search_state, cpi, tile_data, x, bsize,
-                               best_rd_so_far);
-  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
-    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
-    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
-  };
-  HandleInterModeArgs args = {
-    { NULL },  { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
-    { NULL },  { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 },
-    NULL,      NULL,
-    NULL,      search_state.modelled_rd,
-    { { 0 } }, INT_MAX,
-    INT_MAX,   search_state.simple_rd,
-    0,         interintra_modes
-  };
-  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
-
-  av1_invalid_rd_stats(rd_cost);
-
-  // init params, set frame modes, speed features
-  set_params_rd_pick_inter_mode(
-      cpi, x, &args, bsize, mi_row, mi_col, ref_frame_skip_mask, mode_skip_mask,
-      ctx->skip_ref_frame_mask, ref_costs_single, ref_costs_comp, yv12_mb);
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-  int64_t best_est_rd = INT64_MAX;
-  // TODO(angiebird): Turn this on when this speed feature is well tested
-#if 1
-  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
-  const int do_tx_search = !md->ready;
-#else
-  const int do_tx_search = 1;
-#endif
-  InterModesInfo *inter_modes_info = &tile_data->inter_modes_info;
-  inter_modes_info->num = 0;
-#endif
-
-  int intra_mode_num = 0;
-  int intra_mode_idx_ls[MAX_MODES];
-  int reach_first_comp_mode = 0;
-
-  // Temporary buffers used by handle_inter_mode().
-  // We allocate them once and reuse it in every call to that function.
-  // Note: Must be allocated on the heap due to large size of the arrays.
-  uint8_t *tmp_buf_orig;
-  CHECK_MEM_ERROR(
-      cm, tmp_buf_orig,
-      (uint8_t *)aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE));
-  uint8_t *const tmp_buf = get_buf_by_bd(xd, tmp_buf_orig);
-
-  CompoundTypeRdBuffers rd_buffers;
-  alloc_compound_type_rd_buffers(cm, &rd_buffers);
-
-  for (int midx = 0; midx < MAX_MODES; ++midx) {
-    int mode_index = mode_map[midx];
-    int64_t this_rd = INT64_MAX;
-    int disable_skip = 0;
-    int rate2 = 0, rate_y = 0, rate_uv = 0;
-    int64_t distortion2 = 0;
-    int skippable = 0;
-    int this_skip2 = 0;
-    const MODE_DEFINITION *mode_order = &av1_mode_order[mode_index];
-    const MV_REFERENCE_FRAME ref_frame = mode_order->ref_frame[0];
-    const MV_REFERENCE_FRAME second_ref_frame = mode_order->ref_frame[1];
-    const int comp_pred = second_ref_frame > INTRA_FRAME;
-    this_mode = mode_order->mode;
-
-    init_mbmi(mbmi, mode_index, cm);
-
-    x->skip = 0;
-    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
-
-    // Reach the first compound prediction mode
-    if (sf->prune_comp_search_by_single_result > 0 && comp_pred &&
-        reach_first_comp_mode == 0) {
-      analyze_single_states(cpi, &search_state);
-      reach_first_comp_mode = 1;
-    }
-    const int ret = inter_mode_search_order_independent_skip(
-        cpi, ctx, x, bsize, mode_index, mi_row, mi_col, mode_skip_mask,
-        ref_frame_skip_mask, &search_state);
-    if (ret == 1) continue;
-    args.skip_motion_mode = (ret == 2);
-
-    if (sf->drop_ref && comp_pred) {
-      if (sf_check_is_drop_ref(mode_order, &search_state)) {
-        continue;
-      }
-    }
-
-    if (search_state.best_rd < search_state.mode_threshold[mode_index])
-      continue;
-
-    if (sf->prune_comp_search_by_single_result > 0 && comp_pred) {
-      if (compound_skip_by_single_states(cpi, &search_state, this_mode,
-                                         ref_frame, second_ref_frame, x))
-        continue;
-    }
-
-    const int ref_frame_cost = comp_pred
-                                   ? ref_costs_comp[ref_frame][second_ref_frame]
-                                   : ref_costs_single[ref_frame];
-    const int compmode_cost =
-        is_comp_ref_allowed(mbmi->sb_type) ? comp_inter_cost[comp_pred] : 0;
-    const int real_compmode_cost =
-        cm->reference_mode == REFERENCE_MODE_SELECT ? compmode_cost : 0;
-
-    if (comp_pred) {
-      if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
-          search_state.best_mode_index >= 0 &&
-          search_state.best_mbmode.ref_frame[0] == INTRA_FRAME)
-        continue;
-    }
-
-    if (ref_frame == INTRA_FRAME) {
-      if (sf->adaptive_mode_search)
-        if ((x->source_variance << num_pels_log2_lookup[bsize]) >
-            search_state.best_pred_sse)
-          continue;
-
-      if (this_mode != DC_PRED) {
-        // Only search the oblique modes if the best so far is
-        // one of the neighboring directional modes
-        if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
-            (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
-          if (search_state.best_mode_index >= 0 &&
-              search_state.best_mbmode.ref_frame[0] > INTRA_FRAME)
-            continue;
-        }
-        if (sf->mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
-          if (conditional_skipintra(this_mode, search_state.best_intra_mode))
-            continue;
-        }
-      }
-    }
-
-    // Select prediction reference frames.
-    for (i = 0; i < num_planes; i++) {
-      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
-      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
-    }
-
-    if (ref_frame == INTRA_FRAME) {
-      intra_mode_idx_ls[intra_mode_num++] = mode_index;
-      continue;
-    } else {
-      mbmi->angle_delta[PLANE_TYPE_Y] = 0;
-      mbmi->angle_delta[PLANE_TYPE_UV] = 0;
-      mbmi->filter_intra_mode_info.use_filter_intra = 0;
-      mbmi->ref_mv_idx = 0;
-      int64_t ref_best_rd = search_state.best_rd;
-      {
-        RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
-        av1_init_rd_stats(&rd_stats);
-        rd_stats.rate = rate2;
-
-        // Point to variables that are maintained between loop iterations
-        args.single_newmv = search_state.single_newmv;
-        args.single_newmv_rate = search_state.single_newmv_rate;
-        args.single_newmv_valid = search_state.single_newmv_valid;
-        args.single_comp_cost = real_compmode_cost;
-        args.ref_frame_cost = ref_frame_cost;
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-        this_rd = handle_inter_mode(
-            cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip,
-            mi_row, mi_col, &args, ref_best_rd, tmp_buf, &rd_buffers, tile_data,
-            &best_est_rd, do_tx_search, inter_modes_info);
-#else
-        this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
-                                    &rd_stats_uv, &disable_skip, mi_row, mi_col,
-                                    &args, ref_best_rd, tmp_buf, &rd_buffers);
-#endif
-        rate2 = rd_stats.rate;
-        skippable = rd_stats.skip;
-        distortion2 = rd_stats.dist;
-        rate_y = rd_stats_y.rate;
-        rate_uv = rd_stats_uv.rate;
-      }
-
-      if (sf->prune_comp_search_by_single_result > 0 &&
-          is_inter_singleref_mode(this_mode)) {
-        collect_single_states(x, &search_state, mbmi);
-      }
-
-      if (this_rd == INT64_MAX) continue;
-
-      this_skip2 = mbmi->skip;
-      this_rd = RDCOST(x->rdmult, rate2, distortion2);
-      if (this_skip2) {
-        rate_y = 0;
-        rate_uv = 0;
-      }
-    }
-
-    // Did this mode help.. i.e. is it the new best mode
-    if (this_rd < search_state.best_rd || x->skip) {
-      int mode_excluded = 0;
-      if (comp_pred) {
-        mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
-      }
-      if (!mode_excluded) {
-        // Note index of best mode so far
-        search_state.best_mode_index = mode_index;
-
-        if (ref_frame == INTRA_FRAME) {
-          /* required for left and above block mv */
-          mbmi->mv[0].as_int = 0;
-        } else {
-          search_state.best_pred_sse = x->pred_sse[ref_frame];
-        }
-
-        rd_cost->rate = rate2;
-        rd_cost->dist = distortion2;
-        rd_cost->rdcost = this_rd;
-        search_state.best_rd = this_rd;
-        search_state.best_mbmode = *mbmi;
-        search_state.best_skip2 = this_skip2;
-        search_state.best_mode_skippable = skippable;
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-        if (do_tx_search) {
-          // When do_tx_search == 0, handle_inter_mode won't provide correct
-          // rate_y and rate_uv because txfm_search process is replaced by
-          // rd estimation.
-          // Therfore, we should avoid updating best_rate_y and best_rate_uv
-          // here. These two values will be updated when txfm_search is called
-          search_state.best_rate_y =
-              rate_y +
-              x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable];
-          search_state.best_rate_uv = rate_uv;
-        }
-#else   // CONFIG_COLLECT_INTER_MODE_RD_STATS
-        search_state.best_rate_y =
-            rate_y +
-            x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable];
-        search_state.best_rate_uv = rate_uv;
-#endif  // CONFIG_COLLECT_INTER_MODE_RD_STATS
-        memcpy(ctx->blk_skip, x->blk_skip,
-               sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-      }
-    }
-
-    /* keep record of best compound/single-only prediction */
-    if (!disable_skip && ref_frame != INTRA_FRAME) {
-      int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
-
-      if (cm->reference_mode == REFERENCE_MODE_SELECT) {
-        single_rate = rate2 - compmode_cost;
-        hybrid_rate = rate2;
-      } else {
-        single_rate = rate2;
-        hybrid_rate = rate2 + compmode_cost;
-      }
-
-      single_rd = RDCOST(x->rdmult, single_rate, distortion2);
-      hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
-
-      if (!comp_pred) {
-        if (single_rd < search_state.best_pred_rd[SINGLE_REFERENCE])
-          search_state.best_pred_rd[SINGLE_REFERENCE] = single_rd;
-      } else {
-        if (single_rd < search_state.best_pred_rd[COMPOUND_REFERENCE])
-          search_state.best_pred_rd[COMPOUND_REFERENCE] = single_rd;
-      }
-      if (hybrid_rd < search_state.best_pred_rd[REFERENCE_MODE_SELECT])
-        search_state.best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
-    }
-    if (sf->drop_ref && second_ref_frame == NONE_FRAME) {
-      // Collect data from single ref mode, and analyze data.
-      sf_drop_ref_analyze(&search_state, mode_order, distortion2);
-    }
-
-    if (x->skip && !comp_pred) break;
-  }
-
-  aom_free(tmp_buf_orig);
-  tmp_buf_orig = NULL;
-  release_compound_type_rd_buffers(&rd_buffers);
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-  if (!do_tx_search) {
-    inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
-    search_state.best_rd = INT64_MAX;
-
-    int64_t top_est_rd =
-        inter_modes_info->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx];
-    for (int j = 0; j < inter_modes_info->num; ++j) {
-      const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
-      *mbmi = inter_modes_info->mbmi_arr[data_idx];
-      int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
-      if (curr_est_rd * 0.9 > top_est_rd) {
-        continue;
-      }
-      const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
-
-      x->skip = 0;
-      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-
-      // Select prediction reference frames.
-      const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
-      for (i = 0; i < num_planes; i++) {
-        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
-        if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
-      }
-
-      RD_STATS rd_stats;
-      RD_STATS rd_stats_y;
-      RD_STATS rd_stats_uv;
-
-      av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
-      if (mbmi->motion_mode == OBMC_CAUSAL)
-        av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
-
-      if (!txfm_search(cpi, x, bsize, mi_row, mi_col, &rd_stats, &rd_stats_y,
-                       &rd_stats_uv, mode_rate, search_state.best_rd)) {
-        continue;
-      } else {
-        const int skip_ctx = av1_get_skip_context(xd);
-        inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats.sse,
-                             rd_stats.dist,
-                             rd_stats_y.rate + rd_stats_uv.rate +
-                                 x->skip_cost[skip_ctx][mbmi->skip]);
-      }
-      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
-
-      if (rd_stats.rdcost < search_state.best_rd) {
-        search_state.best_rd = rd_stats.rdcost;
-        // Note index of best mode so far
-        const int mode_index = get_prediction_mode_idx(
-            mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-        search_state.best_mode_index = mode_index;
-        *rd_cost = rd_stats;
-        search_state.best_rd = rd_stats.rdcost;
-        search_state.best_mbmode = *mbmi;
-        search_state.best_skip2 = mbmi->skip;
-        search_state.best_mode_skippable = rd_stats.skip;
-        search_state.best_rate_y =
-            rd_stats_y.rate +
-            x->skip_cost[av1_get_skip_context(xd)][rd_stats.skip || mbmi->skip];
-        search_state.best_rate_uv = rd_stats_uv.rate;
-        memcpy(ctx->blk_skip, x->blk_skip,
-               sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-      }
-    }
-  }
-#endif
-
-  for (int j = 0; j < intra_mode_num; ++j) {
-    const int mode_index = intra_mode_idx_ls[j];
-    const MV_REFERENCE_FRAME ref_frame =
-        av1_mode_order[mode_index].ref_frame[0];
-    assert(av1_mode_order[mode_index].ref_frame[1] == NONE_FRAME);
-    assert(ref_frame == INTRA_FRAME);
-    if (sf->skip_intra_in_interframe && search_state.skip_intra_modes) break;
-    init_mbmi(mbmi, mode_index, cm);
-    x->skip = 0;
-    set_ref_ptrs(cm, xd, INTRA_FRAME, NONE_FRAME);
-
-    // Select prediction reference frames.
-    for (i = 0; i < num_planes; i++) {
-      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
-    }
-
-    RD_STATS intra_rd_stats, intra_rd_stats_y, intra_rd_stats_uv;
-
-    const int ref_frame_cost = ref_costs_single[ref_frame];
-    intra_rd_stats.rdcost = handle_intra_mode(
-        &search_state, cpi, x, bsize, mi_row, mi_col, ref_frame_cost, ctx, 0,
-        &intra_rd_stats, &intra_rd_stats_y, &intra_rd_stats_uv);
-    if (intra_rd_stats.rdcost < search_state.best_rd) {
-      search_state.best_rd = intra_rd_stats.rdcost;
-      // Note index of best mode so far
-      search_state.best_mode_index = mode_index;
-      *rd_cost = intra_rd_stats;
-      search_state.best_rd = intra_rd_stats.rdcost;
-      search_state.best_mbmode = *mbmi;
-      search_state.best_skip2 = 0;
-      search_state.best_mode_skippable = intra_rd_stats.skip;
-      search_state.best_rate_y =
-          intra_rd_stats_y.rate +
-          x->skip_cost[av1_get_skip_context(xd)][intra_rd_stats.skip];
-      search_state.best_rate_uv = intra_rd_stats_uv.rate;
-      memcpy(ctx->blk_skip, x->blk_skip,
-             sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
-    }
-  }
-
-  // In effect only when speed >= 2.
-  sf_refine_fast_tx_type_search(
-      cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, search_state.best_mode_index,
-      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
-      search_state.best_rate_uv, &search_state.best_skip2);
-
-  // Only try palette mode when the best mode so far is an intra mode.
-  if (try_palette && !is_inter_mode(search_state.best_mbmode.mode)) {
-    search_palette_mode(cpi, x, mi_row, mi_col, rd_cost, ctx, bsize, mbmi, pmi,
-                        ref_costs_single, &search_state);
-  }
-
-  search_state.best_mbmode.skip_mode = 0;
-  if (cm->skip_mode_flag &&
-      !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
-      is_comp_ref_allowed(bsize)) {
-    rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, mi_row, mi_col,
-                      yv12_mb);
-  }
-
-  // Make sure that the ref_mv_idx is only nonzero when we're
-  // using a mode which can support ref_mv_idx
-  if (search_state.best_mbmode.ref_mv_idx != 0 &&
-      !(search_state.best_mbmode.mode == NEWMV ||
-        search_state.best_mbmode.mode == NEW_NEWMV ||
-        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
-    search_state.best_mbmode.ref_mv_idx = 0;
-  }
-
-  if (search_state.best_mode_index < 0 ||
-      search_state.best_rd >= best_rd_so_far) {
-    rd_cost->rate = INT_MAX;
-    rd_cost->rdcost = INT64_MAX;
-    return;
-  }
-
-  assert(
-      (cm->interp_filter == SWITCHABLE) ||
-      (cm->interp_filter ==
-       av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 0)) ||
-      !is_inter_block(&search_state.best_mbmode));
-  assert(
-      (cm->interp_filter == SWITCHABLE) ||
-      (cm->interp_filter ==
-       av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 1)) ||
-      !is_inter_block(&search_state.best_mbmode));
-
-  if (!cpi->rc.is_src_frame_alt_ref)
-    av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
-                              sf->adaptive_rd_thresh, bsize,
-                              search_state.best_mode_index);
-
-  // macroblock modes
-  *mbmi = search_state.best_mbmode;
-  x->skip |= search_state.best_skip2;
-
-  // Note: this section is needed since the mode may have been forced to
-  // GLOBALMV by the all-zero mode handling of ref-mv.
-  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
-    // Correct the interp filters for GLOBALMV
-    if (is_nontrans_global_motion(xd, xd->mi[0])) {
-      assert(mbmi->interp_filters ==
-             av1_broadcast_interp_filter(
-                 av1_unswitchable_filter(cm->interp_filter)));
-    }
-  }
-
-  for (i = 0; i < REFERENCE_MODES; ++i) {
-    if (search_state.best_pred_rd[i] == INT64_MAX)
-      search_state.best_pred_diff[i] = INT_MIN;
-    else
-      search_state.best_pred_diff[i] =
-          search_state.best_rd - search_state.best_pred_rd[i];
-  }
-
-  x->skip |= search_state.best_mode_skippable;
-
-  assert(search_state.best_mode_index >= 0);
-
-  store_coding_context(x, ctx, search_state.best_mode_index,
-                       search_state.best_pred_diff,
-                       search_state.best_mode_skippable);
-
-  if (pmi->palette_size[1] > 0) {
-    assert(try_palette);
-    restore_uv_color_map(cpi, x);
-  }
-}
-
-void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
-                                        TileDataEnc *tile_data, MACROBLOCK *x,
-                                        int mi_row, int mi_col,
-                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
-                                        PICK_MODE_CONTEXT *ctx,
-                                        int64_t best_rd_so_far) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  unsigned char segment_id = mbmi->segment_id;
-  const int comp_pred = 0;
-  int i;
-  int64_t best_pred_diff[REFERENCE_MODES];
-  unsigned int ref_costs_single[REF_FRAMES];
-  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
-  int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)];
-  InterpFilter best_filter = SWITCHABLE;
-  int64_t this_rd = INT64_MAX;
-  int rate2 = 0;
-  const int64_t distortion2 = 0;
-  (void)mi_row;
-  (void)mi_col;
-
-  av1_collect_neighbors_ref_counts(xd);
-
-  estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
-                           ref_costs_comp);
-
-  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
-  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
-
-  rd_cost->rate = INT_MAX;
-
-  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
-
-  mbmi->palette_mode_info.palette_size[0] = 0;
-  mbmi->palette_mode_info.palette_size[1] = 0;
-  mbmi->filter_intra_mode_info.use_filter_intra = 0;
-  mbmi->mode = GLOBALMV;
-  mbmi->motion_mode = SIMPLE_TRANSLATION;
-  mbmi->uv_mode = UV_DC_PRED;
-  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
-    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
-  else
-    mbmi->ref_frame[0] = LAST_FRAME;
-  mbmi->ref_frame[1] = NONE_FRAME;
-  mbmi->mv[0].as_int =
-      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
-                           cm->allow_high_precision_mv, bsize, mi_col, mi_row,
-                           cm->cur_frame_force_integer_mv)
-          .as_int;
-  mbmi->tx_size = max_txsize_lookup[bsize];
-  x->skip = 1;
-
-  mbmi->ref_mv_idx = 0;
-
-  mbmi->motion_mode = SIMPLE_TRANSLATION;
-  av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
-  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
-    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
-    mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
-    // Select the samples according to motion vector difference
-    if (mbmi->num_proj_ref > 1)
-      mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
-                                         mbmi->num_proj_ref, bsize);
-  }
-
-  set_default_interp_filters(mbmi, cm->interp_filter);
-
-  if (cm->interp_filter != SWITCHABLE) {
-    best_filter = cm->interp_filter;
-  } else {
-    best_filter = EIGHTTAP_REGULAR;
-    if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
-        x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
-      int rs;
-      int best_rs = INT_MAX;
-      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
-        mbmi->interp_filters = av1_broadcast_interp_filter(i);
-        rs = av1_get_switchable_rate(cm, x, xd);
-        if (rs < best_rs) {
-          best_rs = rs;
-          best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
-        }
-      }
-    }
-  }
-  // Set the appropriate filter
-  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
-  rate2 += av1_get_switchable_rate(cm, x, xd);
-
-  if (cm->reference_mode == REFERENCE_MODE_SELECT)
-    rate2 += comp_inter_cost[comp_pred];
-
-  // Estimate the reference frame signaling cost and add it
-  // to the rolling cost variable.
-  rate2 += ref_costs_single[LAST_FRAME];
-  this_rd = RDCOST(x->rdmult, rate2, distortion2);
-
-  rd_cost->rate = rate2;
-  rd_cost->dist = distortion2;
-  rd_cost->rdcost = this_rd;
-
-  if (this_rd >= best_rd_so_far) {
-    rd_cost->rate = INT_MAX;
-    rd_cost->rdcost = INT64_MAX;
-    return;
-  }
-
-  assert((cm->interp_filter == SWITCHABLE) ||
-         (cm->interp_filter ==
-          av1_extract_interp_filter(mbmi->interp_filters, 0)));
-
-  av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
-                            cpi->sf.adaptive_rd_thresh, bsize, THR_GLOBALMV);
-
-  av1_zero(best_pred_diff);
-
-  store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
-}
-
-struct calc_target_weighted_pred_ctxt {
-  const MACROBLOCK *x;
-  const uint8_t *tmp;
-  int tmp_stride;
-  int overlap;
-};
-
-static INLINE void calc_target_weighted_pred_above(
-    MACROBLOCKD *xd, int rel_mi_col, uint8_t nb_mi_width, MB_MODE_INFO *nb_mi,
-    void *fun_ctxt, const int num_planes) {
-  (void)nb_mi;
-  (void)num_planes;
-
-  struct calc_target_weighted_pred_ctxt *ctxt =
-      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
-
-  const int bw = xd->n4_w << MI_SIZE_LOG2;
-  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
-
-  int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
-  int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
-  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
-  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-
-  if (!is_hbd) {
-    for (int row = 0; row < ctxt->overlap; ++row) {
-      const uint8_t m0 = mask1d[row];
-      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
-      for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
-        wsrc[col] = m1 * tmp[col];
-        mask[col] = m0;
-      }
-      wsrc += bw;
-      mask += bw;
-      tmp += ctxt->tmp_stride;
-    }
-  } else {
-    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
-
-    for (int row = 0; row < ctxt->overlap; ++row) {
-      const uint8_t m0 = mask1d[row];
-      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
-      for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
-        wsrc[col] = m1 * tmp16[col];
-        mask[col] = m0;
-      }
-      wsrc += bw;
-      mask += bw;
-      tmp16 += ctxt->tmp_stride;
-    }
-  }
-}
-
-static INLINE void calc_target_weighted_pred_left(
-    MACROBLOCKD *xd, int rel_mi_row, uint8_t nb_mi_height, MB_MODE_INFO *nb_mi,
-    void *fun_ctxt, const int num_planes) {
-  (void)nb_mi;
-  (void)num_planes;
-
-  struct calc_target_weighted_pred_ctxt *ctxt =
-      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
-
-  const int bw = xd->n4_w << MI_SIZE_LOG2;
-  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
-
-  int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
-  int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
-  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
-  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-
-  if (!is_hbd) {
-    for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
-      for (int col = 0; col < ctxt->overlap; ++col) {
-        const uint8_t m0 = mask1d[col];
-        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
-        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
-                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
-        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
-      }
-      wsrc += bw;
-      mask += bw;
-      tmp += ctxt->tmp_stride;
-    }
-  } else {
-    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
-
-    for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
-      for (int col = 0; col < ctxt->overlap; ++col) {
-        const uint8_t m0 = mask1d[col];
-        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
-        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
-                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
-        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
-      }
-      wsrc += bw;
-      mask += bw;
-      tmp16 += ctxt->tmp_stride;
-    }
-  }
-}
-
-// This function has a structure similar to av1_build_obmc_inter_prediction
-//
-// The OBMC predictor is computed as:
-//
-//  PObmc(x,y) =
-//    AOM_BLEND_A64(Mh(x),
-//                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
-//                  PLeft(x, y))
-//
-// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
-// rounding, this can be written as:
-//
-//  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
-//    Mh(x) * Mv(y) * P(x,y) +
-//      Mh(x) * Cv(y) * Pabove(x,y) +
-//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
-//
-// Where :
-//
-//  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
-//  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
-//
-// This function computes 'wsrc' and 'mask' as:
-//
-//  wsrc(x, y) =
-//    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
-//      Mh(x) * Cv(y) * Pabove(x,y) +
-//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
-//
-//  mask(x, y) = Mh(x) * Mv(y)
-//
-// These can then be used to efficiently approximate the error for any
-// predictor P in the context of the provided neighbouring predictors by
-// computing:
-//
-//  error(x, y) =
-//    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
-//
-static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
-                                      const MACROBLOCKD *xd, int mi_row,
-                                      int mi_col, const uint8_t *above,
-                                      int above_stride, const uint8_t *left,
-                                      int left_stride) {
-  const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-  const int bw = xd->n4_w << MI_SIZE_LOG2;
-  const int bh = xd->n4_h << MI_SIZE_LOG2;
-  int32_t *mask_buf = x->mask_buf;
-  int32_t *wsrc_buf = x->wsrc_buf;
-
-  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
-
-  // plane 0 should not be subsampled
-  assert(xd->plane[0].subsampling_x == 0);
-  assert(xd->plane[0].subsampling_y == 0);
-
-  av1_zero_array(wsrc_buf, bw * bh);
-  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
-
-  // handle above row
-  if (xd->up_available) {
-    const int overlap =
-        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
-    struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
-                                                   overlap };
-    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col,
-                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
-                                  calc_target_weighted_pred_above, &ctxt);
-  }
-
-  for (int i = 0; i < bw * bh; ++i) {
-    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
-    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
-  }
-
-  // handle left column
-  if (xd->left_available) {
-    const int overlap =
-        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
-    struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
-                                                   overlap };
-    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row,
-                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
-                                 calc_target_weighted_pred_left, &ctxt);
-  }
-
-  if (!is_hbd) {
-    const uint8_t *src = x->plane[0].src.buf;
-
-    for (int row = 0; row < bh; ++row) {
-      for (int col = 0; col < bw; ++col) {
-        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
-      }
-      wsrc_buf += bw;
-      src += x->plane[0].src.stride;
-    }
-  } else {
-    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
-
-    for (int row = 0; row < bh; ++row) {
-      for (int col = 0; col < bw; ++col) {
-        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
-      }
-      wsrc_buf += bw;
-      src += x->plane[0].src.stride;
-    }
-  }
-}
diff --git a/third_party/aom/av1/encoder/rdopt.h b/third_party/aom/av1/encoder/rdopt.h
deleted file mode 100644
index 4c11f90b8..000000000
--- a/third_party/aom/av1/encoder/rdopt.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RDOPT_H_
-#define AOM_AV1_ENCODER_RDOPT_H_
-
-#include "av1/common/blockd.h"
-#include "av1/common/txb_common.h"
-
-#include "av1/encoder/block.h"
-#include "av1/encoder/context_tree.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_REF_MV_SERCH 3
-#define DEFAULT_LUMA_INTERP_SKIP_FLAG 1
-#define DEFAULT_CHROMA_INTERP_SKIP_FLAG 2
-#define DEFAULT_INTERP_SKIP_FLAG \
-  (DEFAULT_LUMA_INTERP_SKIP_FLAG | DEFAULT_CHROMA_INTERP_SKIP_FLAG)
-
-struct TileInfo;
-struct macroblock;
-struct RD_STATS;
-
-#if CONFIG_RD_DEBUG
-static INLINE void av1_update_txb_coeff_cost(RD_STATS *rd_stats, int plane,
-                                             TX_SIZE tx_size, int blk_row,
-                                             int blk_col, int txb_coeff_cost) {
-  (void)blk_row;
-  (void)blk_col;
-  (void)tx_size;
-  rd_stats->txb_coeff_cost[plane] += txb_coeff_cost;
-
-  {
-    const int txb_h = tx_size_high_unit[tx_size];
-    const int txb_w = tx_size_wide_unit[tx_size];
-    int idx, idy;
-    for (idy = 0; idy < txb_h; ++idy)
-      for (idx = 0; idx < txb_w; ++idx)
-        rd_stats->txb_coeff_cost_map[plane][blk_row + idy][blk_col + idx] = 0;
-
-    rd_stats->txb_coeff_cost_map[plane][blk_row][blk_col] = txb_coeff_cost;
-  }
-  assert(blk_row < TXB_COEFF_COST_MAP_SIZE);
-  assert(blk_col < TXB_COEFF_COST_MAP_SIZE);
-}
-#endif
-
-// Returns the number of colors in 'src'.
-int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
-                     int *val_count);
-// Same as av1_count_colors(), but for high-bitdepth mode.
-int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
-                            int bit_depth, int *val_count);
-
-#if CONFIG_DIST_8X8
-int64_t av1_dist_8x8(const struct AV1_COMP *const cpi, const MACROBLOCK *x,
-                     const uint8_t *src, int src_stride, const uint8_t *dst,
-                     int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
-                     int bsh, int visible_w, int visible_h, int qindex);
-#endif
-
-static INLINE int av1_cost_skip_txb(MACROBLOCK *x, const TXB_CTX *const txb_ctx,
-                                    int plane, TX_SIZE tx_size) {
-  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-  const PLANE_TYPE plane_type = get_plane_type(plane);
-  const LV_MAP_COEFF_COST *const coeff_costs =
-      &x->coeff_costs[txs_ctx][plane_type];
-  return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
-}
-
-static INLINE int av1_cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x,
-                                  int plane, int block, TX_SIZE tx_size,
-                                  const TX_TYPE tx_type,
-                                  const TXB_CTX *const txb_ctx,
-                                  int use_fast_coef_costing) {
-#if TXCOEFF_COST_TIMER
-  struct aom_usec_timer timer;
-  aom_usec_timer_start(&timer);
-#endif
-  (void)use_fast_coef_costing;
-  const int cost =
-      av1_cost_coeffs_txb(cm, x, plane, block, tx_size, tx_type, txb_ctx);
-#if TXCOEFF_COST_TIMER
-  AV1_COMMON *tmp_cm = (AV1_COMMON *)&cpi->common;
-  aom_usec_timer_mark(&timer);
-  const int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
-  tmp_cm->txcoeff_cost_timer += elapsed_time;
-  ++tmp_cm->txcoeff_cost_count;
-#endif
-  return cost;
-}
-
-void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
-                               int mi_row, int mi_col, struct RD_STATS *rd_cost,
-                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-                               int64_t best_rd);
-
-unsigned int av1_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
-                                           const struct buf_2d *ref,
-                                           BLOCK_SIZE bs);
-unsigned int av1_high_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
-                                                const struct buf_2d *ref,
-                                                BLOCK_SIZE bs, int bd);
-
-void av1_rd_pick_inter_mode_sb(struct AV1_COMP *cpi,
-                               struct TileDataEnc *tile_data,
-                               struct macroblock *x, int mi_row, int mi_col,
-                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
-                               PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
-
-void av1_rd_pick_inter_mode_sb_seg_skip(
-    const struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
-    struct macroblock *x, int mi_row, int mi_col, struct RD_STATS *rd_cost,
-    BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
-
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-void av1_inter_mode_data_init(struct TileDataEnc *tile_data);
-void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult);
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_RDOPT_H_
diff --git a/third_party/aom/av1/encoder/reconinter_enc.c b/third_party/aom/av1/encoder/reconinter_enc.c
deleted file mode 100644
index 23d920fc3..000000000
--- a/third_party/aom/av1/encoder/reconinter_enc.c
+++ /dev/null
@@ -1,627 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <limits.h>
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-#include "config/aom_scale_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/blend.h"
-
-#include "av1/common/blockd.h"
-#include "av1/common/mvref_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/reconintra.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/obmc.h"
-#include "av1/encoder/reconinter_enc.h"
-
-static INLINE void calc_subpel_params(
-    MACROBLOCKD *xd, const struct scale_factors *const sf, const MV mv,
-    int plane, const int pre_x, const int pre_y, int x, int y,
-    struct buf_2d *const pre_buf, uint8_t **pre, SubpelParams *subpel_params,
-    int bw, int bh) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int is_scaled = av1_is_scaled(sf);
-  if (is_scaled) {
-    int ssx = pd->subsampling_x;
-    int ssy = pd->subsampling_y;
-    int orig_pos_y = (pre_y + y) << SUBPEL_BITS;
-    orig_pos_y += mv.row * (1 << (1 - ssy));
-    int orig_pos_x = (pre_x + x) << SUBPEL_BITS;
-    orig_pos_x += mv.col * (1 << (1 - ssx));
-    int pos_y = sf->scale_value_y(orig_pos_y, sf);
-    int pos_x = sf->scale_value_x(orig_pos_x, sf);
-    pos_x += SCALE_EXTRA_OFF;
-    pos_y += SCALE_EXTRA_OFF;
-
-    const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
-    const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
-    const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
-                       << SCALE_SUBPEL_BITS;
-    const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
-    pos_y = clamp(pos_y, top, bottom);
-    pos_x = clamp(pos_x, left, right);
-
-    *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
-           (pos_x >> SCALE_SUBPEL_BITS);
-    subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
-    subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
-    subpel_params->xs = sf->x_step_q4;
-    subpel_params->ys = sf->y_step_q4;
-  } else {
-    const MV mv_q4 = clamp_mv_to_umv_border_sb(
-        xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
-    subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS;
-    subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
-    subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
-    *pre = pre_buf->buf + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride +
-           (x + (mv_q4.col >> SUBPEL_BITS));
-  }
-}
-
-static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                          int plane, const MB_MODE_INFO *mi,
-                                          int build_for_obmc, int bw, int bh,
-                                          int mi_x, int mi_y) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  int is_compound = has_second_ref(mi);
-  int ref;
-  const int is_intrabc = is_intrabc_block(mi);
-  assert(IMPLIES(is_intrabc, !is_compound));
-  int is_global[2] = { 0, 0 };
-  for (ref = 0; ref < 1 + is_compound; ++ref) {
-    const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
-    is_global[ref] = is_global_mv_block(mi, wm->wmtype);
-  }
-
-  const BLOCK_SIZE bsize = mi->sb_type;
-  const int ss_x = pd->subsampling_x;
-  const int ss_y = pd->subsampling_y;
-  int sub8x8_inter = (block_size_wide[bsize] < 8 && ss_x) ||
-                     (block_size_high[bsize] < 8 && ss_y);
-
-  if (is_intrabc) sub8x8_inter = 0;
-
-  // For sub8x8 chroma blocks, we may be covering more than one luma block's
-  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
-  // the top-left corner of the prediction source - the correct top-left corner
-  // is at (pre_x, pre_y).
-  const int row_start =
-      (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
-  const int col_start =
-      (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
-  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
-  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
-
-  sub8x8_inter = sub8x8_inter && !build_for_obmc;
-  if (sub8x8_inter) {
-    for (int row = row_start; row <= 0 && sub8x8_inter; ++row) {
-      for (int col = col_start; col <= 0; ++col) {
-        const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
-        if (!is_inter_block(this_mbmi)) sub8x8_inter = 0;
-        if (is_intrabc_block(this_mbmi)) sub8x8_inter = 0;
-      }
-    }
-  }
-
-  if (sub8x8_inter) {
-    // block size
-    const int b4_w = block_size_wide[bsize] >> ss_x;
-    const int b4_h = block_size_high[bsize] >> ss_y;
-    const BLOCK_SIZE plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
-    const int b8_w = block_size_wide[plane_bsize] >> ss_x;
-    const int b8_h = block_size_high[plane_bsize] >> ss_y;
-    assert(!is_compound);
-
-    const struct buf_2d orig_pred_buf[2] = { pd->pre[0], pd->pre[1] };
-
-    int row = row_start;
-    for (int y = 0; y < b8_h; y += b4_h) {
-      int col = col_start;
-      for (int x = 0; x < b8_w; x += b4_w) {
-        MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
-        is_compound = has_second_ref(this_mbmi);
-        int tmp_dst_stride = 8;
-        assert(bw < 8 || bh < 8);
-        ConvolveParams conv_params = get_conv_params_no_round(
-            0, plane, xd->tmp_conv_dst, tmp_dst_stride, is_compound, xd->bd);
-        conv_params.use_jnt_comp_avg = 0;
-        struct buf_2d *const dst_buf = &pd->dst;
-        uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
-
-        ref = 0;
-        const RefBuffer *ref_buf =
-            &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME];
-
-        pd->pre[ref].buf0 =
-            (plane == 1) ? ref_buf->buf->u_buffer : ref_buf->buf->v_buffer;
-        pd->pre[ref].buf =
-            pd->pre[ref].buf0 + scaled_buffer_offset(pre_x, pre_y,
-                                                     ref_buf->buf->uv_stride,
-                                                     &ref_buf->sf);
-        pd->pre[ref].width = ref_buf->buf->uv_crop_width;
-        pd->pre[ref].height = ref_buf->buf->uv_crop_height;
-        pd->pre[ref].stride = ref_buf->buf->uv_stride;
-
-        const struct scale_factors *const sf =
-            is_intrabc ? &cm->sf_identity : &ref_buf->sf;
-        struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
-
-        const MV mv = this_mbmi->mv[ref].as_mv;
-
-        uint8_t *pre;
-        SubpelParams subpel_params;
-        WarpTypesAllowed warp_types;
-        warp_types.global_warp_allowed = is_global[ref];
-        warp_types.local_warp_allowed = this_mbmi->motion_mode == WARPED_CAUSAL;
-
-        calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf, &pre,
-                           &subpel_params, bw, bh);
-        conv_params.do_average = ref;
-        if (is_masked_compound_type(mi->interinter_comp.type)) {
-          // masked compound type has its own average mechanism
-          conv_params.do_average = 0;
-        }
-
-        av1_make_inter_predictor(
-            pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf,
-            b4_w, b4_h, &conv_params, this_mbmi->interp_filters, &warp_types,
-            (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y,
-            plane, ref, mi, build_for_obmc, xd, cm->allow_warped_motion);
-
-        ++col;
-      }
-      ++row;
-    }
-
-    for (ref = 0; ref < 2; ++ref) pd->pre[ref] = orig_pred_buf[ref];
-    return;
-  }
-
-  {
-    ConvolveParams conv_params = get_conv_params_no_round(
-        0, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
-    av1_jnt_comp_weight_assign(cm, mi, 0, &conv_params.fwd_offset,
-                               &conv_params.bck_offset,
-                               &conv_params.use_jnt_comp_avg, is_compound);
-
-    struct buf_2d *const dst_buf = &pd->dst;
-    uint8_t *const dst = dst_buf->buf;
-    for (ref = 0; ref < 1 + is_compound; ++ref) {
-      const struct scale_factors *const sf =
-          is_intrabc ? &cm->sf_identity : &xd->block_refs[ref]->sf;
-      struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
-      const MV mv = mi->mv[ref].as_mv;
-
-      uint8_t *pre;
-      SubpelParams subpel_params;
-      calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, 0, 0, pre_buf, &pre,
-                         &subpel_params, bw, bh);
-
-      WarpTypesAllowed warp_types;
-      warp_types.global_warp_allowed = is_global[ref];
-      warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
-
-      if (ref && is_masked_compound_type(mi->interinter_comp.type)) {
-        // masked compound type has its own average mechanism
-        conv_params.do_average = 0;
-        av1_make_masked_inter_predictor(
-            pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, bw,
-            bh, &conv_params, mi->interp_filters, plane, &warp_types,
-            mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y, ref, xd,
-            cm->allow_warped_motion);
-      } else {
-        conv_params.do_average = ref;
-        av1_make_inter_predictor(
-            pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, bw,
-            bh, &conv_params, mi->interp_filters, &warp_types,
-            mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y, plane, ref,
-            mi, build_for_obmc, xd, cm->allow_warped_motion);
-      }
-    }
-  }
-}
-
-static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
-                                              MACROBLOCKD *xd, BLOCK_SIZE bsize,
-                                              int mi_row, int mi_col,
-                                              int plane_from, int plane_to) {
-  int plane;
-  const int mi_x = mi_col * MI_SIZE;
-  const int mi_y = mi_row * MI_SIZE;
-  for (plane = plane_from; plane <= plane_to; ++plane) {
-    const struct macroblockd_plane *pd = &xd->plane[plane];
-    const int bw = pd->width;
-    const int bh = pd->height;
-
-    if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
-                             pd->subsampling_y))
-      continue;
-
-    build_inter_predictors(cm, xd, plane, xd->mi[0], 0, bw, bh, mi_x, mi_y);
-  }
-}
-
-void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                    int mi_row, int mi_col, BUFFER_SET *ctx,
-                                    BLOCK_SIZE bsize) {
-  av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize, 0);
-}
-
-void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                     int mi_row, int mi_col, BUFFER_SET *ctx,
-                                     BLOCK_SIZE bsize) {
-  for (int plane_idx = 1; plane_idx < MAX_MB_PLANE; plane_idx++) {
-    av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize,
-                                   plane_idx);
-  }
-}
-
-void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                    int mi_row, int mi_col, BUFFER_SET *ctx,
-                                    BLOCK_SIZE bsize, int plane_idx) {
-  build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, plane_idx,
-                                    plane_idx);
-
-  if (is_interintra_pred(xd->mi[0])) {
-    BUFFER_SET default_ctx = { { NULL, NULL, NULL }, { 0, 0, 0 } };
-    if (!ctx) {
-      default_ctx.plane[plane_idx] = xd->plane[plane_idx].dst.buf;
-      default_ctx.stride[plane_idx] = xd->plane[plane_idx].dst.stride;
-      ctx = &default_ctx;
-    }
-    av1_build_interintra_predictors_sbp(cm, xd, xd->plane[plane_idx].dst.buf,
-                                        xd->plane[plane_idx].dst.stride, ctx,
-                                        plane_idx, bsize);
-  }
-}
-
-void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                   int mi_row, int mi_col, BUFFER_SET *ctx,
-                                   BLOCK_SIZE bsize) {
-  const int num_planes = av1_num_planes(cm);
-  av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
-  if (num_planes > 1)
-    av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize);
-}
-
-// TODO(sarahparker):
-// av1_build_inter_predictor should be combined with
-// av1_make_inter_predictor
-void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
-                               int dst_stride, const MV *src_mv,
-                               const struct scale_factors *sf, int w, int h,
-                               ConvolveParams *conv_params,
-                               InterpFilters interp_filters,
-                               const WarpTypesAllowed *warp_types, int p_col,
-                               int p_row, int plane, int ref,
-                               enum mv_precision precision, int x, int y,
-                               const MACROBLOCKD *xd, int can_use_previous) {
-  const int is_q4 = precision == MV_PRECISION_Q4;
-  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
-                     is_q4 ? src_mv->col : src_mv->col * 2 };
-  MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
-  mv.col += SCALE_EXTRA_OFF;
-  mv.row += SCALE_EXTRA_OFF;
-
-  const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4,
-                                       mv.col & SCALE_SUBPEL_MASK,
-                                       mv.row & SCALE_SUBPEL_MASK };
-  src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
-         (mv.col >> SCALE_SUBPEL_BITS);
-
-  av1_make_inter_predictor(src, src_stride, dst, dst_stride, &subpel_params, sf,
-                           w, h, conv_params, interp_filters, warp_types, p_col,
-                           p_row, plane, ref, xd->mi[0], 0, xd,
-                           can_use_previous);
-}
-
-static INLINE void build_prediction_by_above_pred(
-    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
-    MB_MODE_INFO *above_mbmi, void *fun_ctxt, const int num_planes) {
-  struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
-  const int above_mi_col = ctxt->mi_col + rel_mi_col;
-  int mi_x, mi_y;
-  MB_MODE_INFO backup_mbmi = *above_mbmi;
-
-  av1_setup_build_prediction_by_above_pred(xd, rel_mi_col, above_mi_width,
-                                           above_mbmi, ctxt, num_planes);
-  mi_x = above_mi_col << MI_SIZE_LOG2;
-  mi_y = ctxt->mi_row << MI_SIZE_LOG2;
-
-  const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-
-  for (int j = 0; j < num_planes; ++j) {
-    const struct macroblockd_plane *pd = &xd->plane[j];
-    int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
-    int bh = clamp(block_size_high[bsize] >> (pd->subsampling_y + 1), 4,
-                   block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1));
-
-    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
-    build_inter_predictors(ctxt->cm, xd, j, above_mbmi, 1, bw, bh, mi_x, mi_y);
-  }
-  *above_mbmi = backup_mbmi;
-}
-
-void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                         int mi_row, int mi_col,
-                                         uint8_t *tmp_buf[MAX_MB_PLANE],
-                                         int tmp_width[MAX_MB_PLANE],
-                                         int tmp_height[MAX_MB_PLANE],
-                                         int tmp_stride[MAX_MB_PLANE]) {
-  if (!xd->up_available) return;
-
-  // Adjust mb_to_bottom_edge to have the correct value for the OBMC
-  // prediction block. This is half the height of the original block,
-  // except for 128-wide blocks, where we only use a height of 32.
-  int this_height = xd->n4_h * MI_SIZE;
-  int pred_height = AOMMIN(this_height / 2, 32);
-  xd->mb_to_bottom_edge += (this_height - pred_height) * 8;
-
-  struct build_prediction_ctxt ctxt = { cm,         mi_row,
-                                        mi_col,     tmp_buf,
-                                        tmp_width,  tmp_height,
-                                        tmp_stride, xd->mb_to_right_edge };
-  BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-  foreach_overlappable_nb_above(cm, xd, mi_col,
-                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
-                                build_prediction_by_above_pred, &ctxt);
-
-  xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
-  xd->mb_to_right_edge = ctxt.mb_to_far_edge;
-  xd->mb_to_bottom_edge -= (this_height - pred_height) * 8;
-}
-
-static INLINE void build_prediction_by_left_pred(
-    MACROBLOCKD *xd, int rel_mi_row, uint8_t left_mi_height,
-    MB_MODE_INFO *left_mbmi, void *fun_ctxt, const int num_planes) {
-  struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
-  const int left_mi_row = ctxt->mi_row + rel_mi_row;
-  int mi_x, mi_y;
-  MB_MODE_INFO backup_mbmi = *left_mbmi;
-
-  av1_setup_build_prediction_by_left_pred(xd, rel_mi_row, left_mi_height,
-                                          left_mbmi, ctxt, num_planes);
-  mi_x = ctxt->mi_col << MI_SIZE_LOG2;
-  mi_y = left_mi_row << MI_SIZE_LOG2;
-  const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-
-  for (int j = 0; j < num_planes; ++j) {
-    const struct macroblockd_plane *pd = &xd->plane[j];
-    int bw = clamp(block_size_wide[bsize] >> (pd->subsampling_x + 1), 4,
-                   block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1));
-    int bh = (left_mi_height << MI_SIZE_LOG2) >> pd->subsampling_y;
-
-    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
-    build_inter_predictors(ctxt->cm, xd, j, left_mbmi, 1, bw, bh, mi_x, mi_y);
-  }
-  *left_mbmi = backup_mbmi;
-}
-
-void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                        int mi_row, int mi_col,
-                                        uint8_t *tmp_buf[MAX_MB_PLANE],
-                                        int tmp_width[MAX_MB_PLANE],
-                                        int tmp_height[MAX_MB_PLANE],
-                                        int tmp_stride[MAX_MB_PLANE]) {
-  if (!xd->left_available) return;
-
-  // Adjust mb_to_right_edge to have the correct value for the OBMC
-  // prediction block. This is half the width of the original block,
-  // except for 128-wide blocks, where we only use a width of 32.
-  int this_width = xd->n4_w * MI_SIZE;
-  int pred_width = AOMMIN(this_width / 2, 32);
-  xd->mb_to_right_edge += (this_width - pred_width) * 8;
-
-  struct build_prediction_ctxt ctxt = { cm,         mi_row,
-                                        mi_col,     tmp_buf,
-                                        tmp_width,  tmp_height,
-                                        tmp_stride, xd->mb_to_bottom_edge };
-  BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-  foreach_overlappable_nb_left(cm, xd, mi_row,
-                               max_neighbor_obmc[mi_size_high_log2[bsize]],
-                               build_prediction_by_left_pred, &ctxt);
-
-  xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
-  xd->mb_to_right_edge -= (this_width - pred_width) * 8;
-  xd->mb_to_bottom_edge = ctxt.mb_to_far_edge;
-}
-
-void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                        int mi_row, int mi_col) {
-  const int num_planes = av1_num_planes(cm);
-  uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
-  int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-  int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-  int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-  int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-  int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-  int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
-
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    int len = sizeof(uint16_t);
-    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
-    dst_buf1[1] =
-        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
-    dst_buf1[2] =
-        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
-    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
-    dst_buf2[1] =
-        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
-    dst_buf2[2] =
-        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
-  } else {
-    dst_buf1[0] = xd->tmp_obmc_bufs[0];
-    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
-    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
-    dst_buf2[0] = xd->tmp_obmc_bufs[1];
-    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
-    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
-  }
-  av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
-                                      dst_width1, dst_height1, dst_stride1);
-  av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
-                                     dst_width2, dst_height2, dst_stride2);
-  av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type, get_frame_new_buffer(cm),
-                       mi_row, mi_col, 0, num_planes);
-  av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1,
-                                  dst_buf2, dst_stride2);
-}
-
-// Builds the inter-predictor for the single ref case
-// for use in the encoder to search the wedges efficiently.
-static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
-                                              int bw, int bh, int x, int y,
-                                              int w, int h, int mi_x, int mi_y,
-                                              int ref, uint8_t *const ext_dst,
-                                              int ext_dst_stride,
-                                              int can_use_previous) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  const MB_MODE_INFO *mi = xd->mi[0];
-
-  const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
-  struct buf_2d *const pre_buf = &pd->pre[ref];
-  uint8_t *const dst = get_buf_by_bd(xd, ext_dst) + ext_dst_stride * y + x;
-  const MV mv = mi->mv[ref].as_mv;
-
-  ConvolveParams conv_params = get_conv_params(0, plane, xd->bd);
-  WarpTypesAllowed warp_types;
-  const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
-  warp_types.global_warp_allowed = is_global_mv_block(mi, wm->wmtype);
-  warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL;
-  const int pre_x = (mi_x) >> pd->subsampling_x;
-  const int pre_y = (mi_y) >> pd->subsampling_y;
-  uint8_t *pre;
-  SubpelParams subpel_params;
-  calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf, &pre,
-                     &subpel_params, bw, bh);
-
-  av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride,
-                           &subpel_params, sf, w, h, &conv_params,
-                           mi->interp_filters, &warp_types, pre_x + x,
-                           pre_y + y, plane, ref, mi, 0, xd, can_use_previous);
-}
-
-void av1_build_inter_predictors_for_planes_single_buf(
-    MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
-    int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3],
-    int can_use_previous) {
-  int plane;
-  const int mi_x = mi_col * MI_SIZE;
-  const int mi_y = mi_row * MI_SIZE;
-  for (plane = plane_from; plane <= plane_to; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(
-        bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y);
-    const int bw = block_size_wide[plane_bsize];
-    const int bh = block_size_high[plane_bsize];
-    build_inter_predictors_single_buf(xd, plane, bw, bh, 0, 0, bw, bh, mi_x,
-                                      mi_y, ref, ext_dst[plane],
-                                      ext_dst_stride[plane], can_use_previous);
-  }
-}
-
-static void build_masked_compound(
-    uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
-    const uint8_t *src1, int src1_stride,
-    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
-    int w) {
-  // Derive subsampling from h and w passed in. May be refactored to
-  // pass in subsampling factors directly.
-  const int subh = (2 << mi_size_high_log2[sb_type]) == h;
-  const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
-  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
-  aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
-                     mask, block_size_wide[sb_type], w, h, subw, subh);
-}
-
-static void build_masked_compound_highbd(
-    uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
-    const uint8_t *src1_8, int src1_stride,
-    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
-    int w, int bd) {
-  // Derive subsampling from h and w passed in. May be refactored to
-  // pass in subsampling factors directly.
-  const int subh = (2 << mi_size_high_log2[sb_type]) == h;
-  const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
-  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
-  // const uint8_t *mask =
-  //     av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
-  aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
-                            src1_stride, mask, block_size_wide[sb_type], w, h,
-                            subw, subh, bd);
-}
-
-static void build_wedge_inter_predictor_from_buf(
-    MACROBLOCKD *xd, int plane, int x, int y, int w, int h, uint8_t *ext_dst0,
-    int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) {
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const int is_compound = has_second_ref(mbmi);
-  MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
-  struct buf_2d *const dst_buf = &pd->dst;
-  uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
-  mbmi->interinter_comp.seg_mask = xd->seg_mask;
-  const INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_comp;
-
-  if (is_compound && is_masked_compound_type(comp_data->type)) {
-    if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-        av1_build_compound_diffwtd_mask_highbd(
-            comp_data->seg_mask, comp_data->mask_type,
-            CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
-            CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, h, w, xd->bd);
-      else
-        av1_build_compound_diffwtd_mask(
-            comp_data->seg_mask, comp_data->mask_type, ext_dst0,
-            ext_dst_stride0, ext_dst1, ext_dst_stride1, h, w);
-    }
-
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-      build_masked_compound_highbd(
-          dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
-          CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, comp_data,
-          mbmi->sb_type, h, w, xd->bd);
-    else
-      build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0,
-                            ext_dst1, ext_dst_stride1, comp_data, mbmi->sb_type,
-                            h, w);
-  } else {
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-      aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
-                               dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
-                               xd->bd);
-    else
-      aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
-                        0, NULL, 0, w, h);
-  }
-}
-
-void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize,
-                                              int plane_from, int plane_to,
-                                              uint8_t *ext_dst0[3],
-                                              int ext_dst_stride0[3],
-                                              uint8_t *ext_dst1[3],
-                                              int ext_dst_stride1[3]) {
-  int plane;
-  for (plane = plane_from; plane <= plane_to; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(
-        bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y);
-    const int bw = block_size_wide[plane_bsize];
-    const int bh = block_size_high[plane_bsize];
-    build_wedge_inter_predictor_from_buf(
-        xd, plane, 0, 0, bw, bh, ext_dst0[plane], ext_dst_stride0[plane],
-        ext_dst1[plane], ext_dst_stride1[plane]);
-  }
-}
diff --git a/third_party/aom/av1/encoder/reconinter_enc.h b/third_party/aom/av1/encoder/reconinter_enc.h
deleted file mode 100644
index 10d5e8c28..000000000
--- a/third_party/aom/av1/encoder/reconinter_enc.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_RECONINTER_ENC_H_
-#define AOM_AV1_ENCODER_RECONINTER_ENC_H_
-
-#include "aom/aom_integer.h"
-#include "av1/common/filter.h"
-#include "av1/common/blockd.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/convolve.h"
-#include "av1/common/warped_motion.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                    int mi_row, int mi_col, BUFFER_SET *ctx,
-                                    BLOCK_SIZE bsize);
-
-void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                     int mi_row, int mi_col, BUFFER_SET *ctx,
-                                     BLOCK_SIZE bsize);
-
-void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                    int mi_row, int mi_col, BUFFER_SET *ctx,
-                                    BLOCK_SIZE bsize, int plane_idx);
-
-void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                   int mi_row, int mi_col, BUFFER_SET *ctx,
-                                   BLOCK_SIZE bsize);
-
-void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
-                               int dst_stride, const MV *src_mv,
-                               const struct scale_factors *sf, int w, int h,
-                               ConvolveParams *conv_params,
-                               InterpFilters interp_filters,
-                               const WarpTypesAllowed *warp_types, int p_col,
-                               int p_row, int plane, int ref,
-                               enum mv_precision precision, int x, int y,
-                               const MACROBLOCKD *xd, int can_use_previous);
-
-// Detect if the block have sub-pixel level motion vectors
-// per component.
-#define CHECK_SUBPEL 0
-static INLINE int has_subpel_mv_component(const MB_MODE_INFO *const mbmi,
-                                          const MACROBLOCKD *const xd,
-                                          int dir) {
-#if CHECK_SUBPEL
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  int plane;
-  int ref = (dir >> 1);
-
-  if (dir & 0x01) {
-    if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) return 1;
-  } else {
-    if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) return 1;
-  }
-
-  return 0;
-#else
-  (void)mbmi;
-  (void)xd;
-  (void)dir;
-  return 1;
-#endif
-}
-
-static INLINE int av1_is_interp_search_needed(const MACROBLOCKD *const xd) {
-  MB_MODE_INFO *const mi = xd->mi[0];
-  const int is_compound = has_second_ref(mi);
-  int ref;
-  for (ref = 0; ref < 1 + is_compound; ++ref) {
-    int row_col;
-    for (row_col = 0; row_col < 2; ++row_col) {
-      const int dir = (ref << 1) + row_col;
-      if (has_subpel_mv_component(mi, xd, dir)) {
-        return 1;
-      }
-    }
-  }
-  return 0;
-}
-
-void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                         int mi_row, int mi_col,
-                                         uint8_t *tmp_buf[MAX_MB_PLANE],
-                                         int tmp_width[MAX_MB_PLANE],
-                                         int tmp_height[MAX_MB_PLANE],
-                                         int tmp_stride[MAX_MB_PLANE]);
-
-void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                        int mi_row, int mi_col,
-                                        uint8_t *tmp_buf[MAX_MB_PLANE],
-                                        int tmp_width[MAX_MB_PLANE],
-                                        int tmp_height[MAX_MB_PLANE],
-                                        int tmp_stride[MAX_MB_PLANE]);
-
-void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                        int mi_row, int mi_col);
-
-void av1_build_inter_predictors_for_planes_single_buf(
-    MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
-    int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3],
-    int can_use_previous);
-
-void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize,
-                                              int plane_from, int plane_to,
-                                              uint8_t *ext_dst0[3],
-                                              int ext_dst_stride0[3],
-                                              uint8_t *ext_dst1[3],
-                                              int ext_dst_stride1[3]);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_RECONINTER_ENC_H_
diff --git a/third_party/aom/av1/encoder/segmentation.c b/third_party/aom/av1/encoder/segmentation.c
deleted file mode 100644
index 2e9102745..000000000
--- a/third_party/aom/av1/encoder/segmentation.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-
-#include "aom_mem/aom_mem.h"
-
-#include "av1/common/pred_common.h"
-#include "av1/common/tile_common.h"
-
-#include "av1/encoder/cost.h"
-#include "av1/encoder/segmentation.h"
-
-void av1_enable_segmentation(struct segmentation *seg) {
-  seg->enabled = 1;
-  seg->update_map = 1;
-  seg->update_data = 1;
-  seg->temporal_update = 0;
-}
-
-void av1_disable_segmentation(struct segmentation *seg) {
-  seg->enabled = 0;
-  seg->update_map = 0;
-  seg->update_data = 0;
-  seg->temporal_update = 0;
-}
-
-void av1_disable_segfeature(struct segmentation *seg, int segment_id,
-                            SEG_LVL_FEATURES feature_id) {
-  seg->feature_mask[segment_id] &= ~(1 << feature_id);
-}
-
-void av1_clear_segdata(struct segmentation *seg, int segment_id,
-                       SEG_LVL_FEATURES feature_id) {
-  seg->feature_data[segment_id][feature_id] = 0;
-}
-
-static void count_segs(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                       const TileInfo *tile, MB_MODE_INFO **mi,
-                       unsigned *no_pred_segcounts,
-                       unsigned (*temporal_predictor_count)[2],
-                       unsigned *t_unpred_seg_counts, int bw, int bh,
-                       int mi_row, int mi_col) {
-  int segment_id;
-
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
-  xd->mi = mi;
-  segment_id = xd->mi[0]->segment_id;
-
-  set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
-
-  // Count the number of hits on each segment with no prediction
-  no_pred_segcounts[segment_id]++;
-
-  // Temporal prediction not allowed on key frames
-  if (cm->frame_type != KEY_FRAME) {
-    const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
-    // Test to see if the segment id matches the predicted value.
-    const int pred_segment_id =
-        cm->last_frame_seg_map
-            ? get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col)
-            : 0;
-    const int pred_flag = pred_segment_id == segment_id;
-    const int pred_context = av1_get_pred_context_seg_id(xd);
-
-    // Store the prediction status for this mb and update counts
-    // as appropriate
-    xd->mi[0]->seg_id_predicted = pred_flag;
-    temporal_predictor_count[pred_context][pred_flag]++;
-
-    // Update the "unpredicted" segment count
-    if (!pred_flag) t_unpred_seg_counts[segment_id]++;
-  }
-}
-
-static void count_segs_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                          const TileInfo *tile, MB_MODE_INFO **mi,
-                          unsigned *no_pred_segcounts,
-                          unsigned (*temporal_predictor_count)[2],
-                          unsigned *t_unpred_seg_counts, int mi_row, int mi_col,
-                          BLOCK_SIZE bsize) {
-  const int mis = cm->mi_stride;
-  const int bs = mi_size_wide[bsize], hbs = bs / 2;
-  PARTITION_TYPE partition;
-  const int qbs = bs / 4;
-
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
-#define CSEGS(cs_bw, cs_bh, cs_rowoff, cs_coloff)                              \
-  count_segs(cm, xd, tile, mi + mis * (cs_rowoff) + (cs_coloff),               \
-             no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, \
-             (cs_bw), (cs_bh), mi_row + (cs_rowoff), mi_col + (cs_coloff));
-
-  if (bsize == BLOCK_8X8)
-    partition = PARTITION_NONE;
-  else
-    partition = get_partition(cm, mi_row, mi_col, bsize);
-  switch (partition) {
-    case PARTITION_NONE: CSEGS(bs, bs, 0, 0); break;
-    case PARTITION_HORZ:
-      CSEGS(bs, hbs, 0, 0);
-      CSEGS(bs, hbs, hbs, 0);
-      break;
-    case PARTITION_VERT:
-      CSEGS(hbs, bs, 0, 0);
-      CSEGS(hbs, bs, 0, hbs);
-      break;
-    case PARTITION_HORZ_A:
-      CSEGS(hbs, hbs, 0, 0);
-      CSEGS(hbs, hbs, 0, hbs);
-      CSEGS(bs, hbs, hbs, 0);
-      break;
-    case PARTITION_HORZ_B:
-      CSEGS(bs, hbs, 0, 0);
-      CSEGS(hbs, hbs, hbs, 0);
-      CSEGS(hbs, hbs, hbs, hbs);
-      break;
-    case PARTITION_VERT_A:
-      CSEGS(hbs, hbs, 0, 0);
-      CSEGS(hbs, hbs, hbs, 0);
-      CSEGS(hbs, bs, 0, hbs);
-      break;
-    case PARTITION_VERT_B:
-      CSEGS(hbs, bs, 0, 0);
-      CSEGS(hbs, hbs, 0, hbs);
-      CSEGS(hbs, hbs, hbs, hbs);
-      break;
-    case PARTITION_HORZ_4:
-      CSEGS(bs, qbs, 0, 0);
-      CSEGS(bs, qbs, qbs, 0);
-      CSEGS(bs, qbs, 2 * qbs, 0);
-      if (mi_row + 3 * qbs < cm->mi_rows) CSEGS(bs, qbs, 3 * qbs, 0);
-      break;
-
-    case PARTITION_VERT_4:
-      CSEGS(qbs, bs, 0, 0);
-      CSEGS(qbs, bs, 0, qbs);
-      CSEGS(qbs, bs, 0, 2 * qbs);
-      if (mi_col + 3 * qbs < cm->mi_cols) CSEGS(qbs, bs, 0, 3 * qbs);
-      break;
-
-    case PARTITION_SPLIT: {
-      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
-      int n;
-
-      for (n = 0; n < 4; n++) {
-        const int mi_dc = hbs * (n & 1);
-        const int mi_dr = hbs * (n >> 1);
-
-        count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts,
-                      temporal_predictor_count, t_unpred_seg_counts,
-                      mi_row + mi_dr, mi_col + mi_dc, subsize);
-      }
-    } break;
-    default: assert(0);
-  }
-
-#undef CSEGS
-}
-
-void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
-  struct segmentation *seg = &cm->seg;
-  struct segmentation_probs *segp = &cm->fc->seg;
-  int no_pred_cost;
-  int t_pred_cost = INT_MAX;
-  int tile_col, tile_row, mi_row, mi_col;
-  unsigned temporal_predictor_count[SEG_TEMPORAL_PRED_CTXS][2] = { { 0 } };
-  unsigned no_pred_segcounts[MAX_SEGMENTS] = { 0 };
-  unsigned t_unpred_seg_counts[MAX_SEGMENTS] = { 0 };
-  (void)xd;
-
-  // First of all generate stats regarding how well the last segment map
-  // predicts this one
-  for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
-    TileInfo tile_info;
-    av1_tile_set_row(&tile_info, cm, tile_row);
-    for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
-      MB_MODE_INFO **mi_ptr;
-      av1_tile_set_col(&tile_info, cm, tile_col);
-      mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
-               tile_info.mi_col_start;
-      for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
-           mi_row += cm->seq_params.mib_size,
-          mi_ptr += cm->seq_params.mib_size * cm->mi_stride) {
-        MB_MODE_INFO **mi = mi_ptr;
-        for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
-             mi_col += cm->seq_params.mib_size, mi += cm->seq_params.mib_size) {
-          count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
-                        temporal_predictor_count, t_unpred_seg_counts, mi_row,
-                        mi_col, cm->seq_params.sb_size);
-        }
-      }
-    }
-  }
-
-  int seg_id_cost[MAX_SEGMENTS];
-  av1_cost_tokens_from_cdf(seg_id_cost, segp->tree_cdf, NULL);
-  no_pred_cost = 0;
-  for (int i = 0; i < MAX_SEGMENTS; ++i)
-    no_pred_cost += no_pred_segcounts[i] * seg_id_cost[i];
-
-  // Frames without past dependency cannot use temporal prediction
-  if (cm->primary_ref_frame != PRIMARY_REF_NONE) {
-    int pred_flag_cost[SEG_TEMPORAL_PRED_CTXS][2];
-    for (int i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i)
-      av1_cost_tokens_from_cdf(pred_flag_cost[i], segp->pred_cdf[i], NULL);
-    t_pred_cost = 0;
-    // Cost for signaling the prediction flag.
-    for (int i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
-      for (int j = 0; j < 2; ++j)
-        t_pred_cost += temporal_predictor_count[i][j] * pred_flag_cost[i][j];
-    }
-    // Cost for signaling the unpredicted segment id.
-    for (int i = 0; i < MAX_SEGMENTS; ++i)
-      t_pred_cost += t_unpred_seg_counts[i] * seg_id_cost[i];
-  }
-
-  // Now choose which coding method to use.
-  if (t_pred_cost < no_pred_cost) {
-    assert(!cm->error_resilient_mode);
-    seg->temporal_update = 1;
-  } else {
-    seg->temporal_update = 0;
-  }
-}
-
-void av1_reset_segment_features(AV1_COMMON *cm) {
-  struct segmentation *seg = &cm->seg;
-
-  // Set up default state for MB feature flags
-  seg->enabled = 0;
-  seg->update_map = 0;
-  seg->update_data = 0;
-  av1_clearall_segfeatures(seg);
-}
diff --git a/third_party/aom/av1/encoder/segmentation.h b/third_party/aom/av1/encoder/segmentation.h
deleted file mode 100644
index 1ad13d66a..000000000
--- a/third_party/aom/av1/encoder/segmentation.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_SEGMENTATION_H_
-#define AOM_AV1_ENCODER_SEGMENTATION_H_
-
-#include "av1/common/blockd.h"
-#include "av1/encoder/encoder.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_enable_segmentation(struct segmentation *seg);
-void av1_disable_segmentation(struct segmentation *seg);
-
-void av1_disable_segfeature(struct segmentation *seg, int segment_id,
-                            SEG_LVL_FEATURES feature_id);
-void av1_clear_segdata(struct segmentation *seg, int segment_id,
-                       SEG_LVL_FEATURES feature_id);
-
-void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd);
-
-void av1_reset_segment_features(AV1_COMMON *cm);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_SEGMENTATION_H_
diff --git a/third_party/aom/av1/encoder/speed_features.c b/third_party/aom/av1/encoder/speed_features.c
deleted file mode 100644
index 4c35baae0..000000000
--- a/third_party/aom/av1/encoder/speed_features.c
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <limits.h>
-
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/speed_features.h"
-#include "av1/encoder/rdopt.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-
-// Setting this to 1 will disable trellis optimization completely.
-// Setting this to 2 will disable trellis optimization within the
-// transform search. Trellis optimization will still be applied
-// in the final encode.
-#define DISABLE_TRELLISQ_SEARCH 0
-
-#define MAX_MESH_SPEED 5  // Max speed setting for mesh motion method
-static MESH_PATTERN
-    good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
-      { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
-      { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
-      { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
-      { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
-      { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
-      { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
-    };
-static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
-  50, 50, 25, 15, 5, 1
-};
-
-// TODO(huisu@google.com): These settings are pretty relaxed, tune them for
-// each speed setting
-static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
-  { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
-  { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
-  { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
-  { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
-  { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
-  { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
-};
-static uint8_t intrabc_max_mesh_pct[MAX_MESH_SPEED + 1] = { 100, 100, 100,
-                                                            25,  25,  10 };
-
-// Intra only frames, golden frames (except alt ref overlays) and
-// alt ref frames tend to be coded at a higher than ambient quality
-static int frame_is_boosted(const AV1_COMP *cpi) {
-  return frame_is_kf_gf_arf(cpi);
-}
-
-// Sets a partition size down to which the auto partition code will always
-// search (can go lower), based on the image dimensions. The logic here
-// is that the extent to which ringing artefacts are offensive, depends
-// partly on the screen area that over which they propogate. Propogation is
-// limited by transform block size but the screen area take up by a given block
-// size will be larger for a small image format stretched to full screen.
-static BLOCK_SIZE set_partition_min_limit(AV1_COMMON *const cm) {
-  unsigned int screen_area = (cm->width * cm->height);
-
-  // Select block size based on image format size.
-  if (screen_area < 1280 * 720) {
-    // Formats smaller in area than 720P
-    return BLOCK_4X4;
-  } else if (screen_area < 1920 * 1080) {
-    // Format >= 720P and < 1080P
-    return BLOCK_8X8;
-  } else {
-    // Formats 1080P and up
-    return BLOCK_16X16;
-  }
-}
-
-// Do we have an internal image edge (e.g. formatting bars).
-static int has_internal_image_edge(const AV1_COMP *cpi) {
-  return (cpi->oxcf.pass == 2) &&
-         ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
-          (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
-}
-
-static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi,
-                                                       SPEED_FEATURES *sf,
-                                                       int speed) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
-  const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
-
-  if (is_480p_or_larger) {
-    sf->use_square_partition_only_threshold = BLOCK_128X128;
-  } else {
-    sf->use_square_partition_only_threshold = BLOCK_64X64;
-  }
-
-  // TODO(huisu@google.com): train models for 720P and above.
-  if (!is_720p_or_larger) {
-    sf->ml_partition_search_breakout_thresh[0] = 200;  // BLOCK_8X8
-    sf->ml_partition_search_breakout_thresh[1] = 250;  // BLOCK_16X16
-    sf->ml_partition_search_breakout_thresh[2] = 300;  // BLOCK_32X32
-    sf->ml_partition_search_breakout_thresh[3] = 500;  // BLOCK_64X64
-    sf->ml_partition_search_breakout_thresh[4] = -1;   // BLOCK_128X128
-  }
-
-  if (speed >= 1) {
-    if (is_720p_or_larger) {
-      sf->use_square_partition_only_threshold = BLOCK_128X128;
-    } else if (is_480p_or_larger) {
-      sf->use_square_partition_only_threshold = BLOCK_64X64;
-    } else {
-      sf->use_square_partition_only_threshold = BLOCK_32X32;
-    }
-
-    if (!is_720p_or_larger) {
-      sf->ml_partition_search_breakout_thresh[0] = 200;  // BLOCK_8X8
-      sf->ml_partition_search_breakout_thresh[1] = 250;  // BLOCK_16X16
-      sf->ml_partition_search_breakout_thresh[2] = 300;  // BLOCK_32X32
-      sf->ml_partition_search_breakout_thresh[3] = 300;  // BLOCK_64X64
-      sf->ml_partition_search_breakout_thresh[4] = -1;   // BLOCK_128X128
-    }
-  }
-
-  if (speed >= 2) {
-    if (is_720p_or_larger) {
-      sf->disable_split_mask =
-          cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
-      sf->adaptive_pred_interp_filter = 0;
-      sf->partition_search_breakout_dist_thr = (1 << 24);
-      sf->partition_search_breakout_rate_thr = 120;
-    } else {
-      sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-      sf->partition_search_breakout_dist_thr = (1 << 22);
-      sf->partition_search_breakout_rate_thr = 100;
-    }
-    sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
-  }
-
-  if (speed >= 3) {
-    if (is_720p_or_larger) {
-      sf->disable_split_mask = DISABLE_ALL_SPLIT;
-      sf->partition_search_breakout_dist_thr = (1 << 25);
-      sf->partition_search_breakout_rate_thr = 200;
-    } else {
-      sf->max_intra_bsize = BLOCK_32X32;
-      sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
-      sf->partition_search_breakout_dist_thr = (1 << 23);
-      sf->partition_search_breakout_rate_thr = 120;
-    }
-  }
-
-  // If this is a two pass clip that fits the criteria for animated or
-  // graphics content then reset disable_split_mask for speeds 2+.
-  // Also if the image edge is internal to the coded area.
-  if ((speed >= 2) && (cpi->oxcf.pass == 2) &&
-      ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
-       (has_internal_image_edge(cpi)))) {
-    sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
-  }
-
-  if (speed >= 4) {
-    if (is_720p_or_larger) {
-      sf->partition_search_breakout_dist_thr = (1 << 26);
-    } else {
-      sf->partition_search_breakout_dist_thr = (1 << 24);
-    }
-    sf->disable_split_mask = DISABLE_ALL_SPLIT;
-  }
-}
-
-static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
-                                                          SPEED_FEATURES *sf,
-                                                          int speed) {
-  AV1_COMMON *const cm = &cpi->common;
-  const int boosted = frame_is_boosted(cpi);
-
-  // Speed 0 for all speed features that give neutral coding performance change.
-  sf->reduce_inter_modes = 1;
-  sf->prune_ext_partition_types_search_level = 1;
-  sf->ml_prune_rect_partition = 1;
-  sf->ml_prune_ab_partition = 1;
-  sf->ml_prune_4_partition = 1;
-  sf->adaptive_txb_search_level = 1;
-  sf->jnt_comp_skip_mv_search = 1;
-  sf->model_based_prune_tx_search_level = 1;
-  sf->model_based_post_interp_filter_breakout = 1;
-  sf->inter_mode_rd_model_estimation = 1;
-  sf->prune_ref_frame_for_rect_partitions =
-      !(boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame);
-  sf->less_rectangular_check_level = 1;
-  sf->gm_search_type = GM_REDUCED_REF_SEARCH;
-  sf->gm_disable_recode = 1;
-
-  if (speed >= 1) {
-    sf->gm_erroradv_type = GM_ERRORADV_TR_1;
-    sf->selective_ref_frame = 1;
-    sf->inter_tx_size_search_init_depth_rect = 1;
-    sf->inter_tx_size_search_init_depth_sqr = 1;
-    sf->intra_tx_size_search_init_depth_rect = 1;
-    sf->intra_tx_size_search_init_depth_sqr = 1;
-    sf->tx_size_search_lgr_block = 1;
-    if (speed >= CONFIG_2PASS_PARTITION_SEARCH_LVL) {
-      sf->two_pass_partition_search = 1;
-      sf->mode_pruning_based_on_two_pass_partition_search = 1;
-    }
-    sf->prune_ext_partition_types_search_level = 2;
-    sf->use_fast_interpolation_filter_search = 1;
-    sf->skip_repeat_interpolation_filter_search = 1;
-    sf->tx_type_search.skip_tx_search = 1;
-    sf->tx_type_search.ml_tx_split_thresh = 40;
-    sf->model_based_prune_tx_search_level = 0;
-    sf->model_based_post_interp_filter_breakout = 0;
-    // TODO(angiebird): Re-evaluate the impact of inter_mode_rd_model_estimation
-    // on speed 1
-    sf->inter_mode_rd_model_estimation = 0;
-    sf->adaptive_txb_search_level = 2;
-    sf->use_intra_txb_hash = 1;
-    sf->optimize_b_precheck = 1;
-    sf->dual_sgr_penalty_level = 1;
-    sf->use_accurate_subpel_search = 1;
-    sf->reuse_inter_intra_mode = 1;
-    sf->prune_comp_search_by_single_result = 1;
-    sf->skip_repeated_newmv = 1;
-    sf->obmc_full_pixel_search_level = 1;
-  }
-
-  if (speed >= 2) {
-    sf->gm_erroradv_type = GM_ERRORADV_TR_2;
-
-    sf->selective_ref_frame = 2;
-    sf->fast_cdef_search = 1;
-
-    sf->adaptive_rd_thresh = 1;
-    sf->mv.auto_mv_step_size = 1;
-    sf->mv.subpel_iters_per_step = 1;
-    sf->disable_filter_search_var_thresh = 100;
-    sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
-
-    sf->partition_search_breakout_rate_thr = 80;
-    // Note: This speed feature is disable as it seems to be worse in
-    // compression/quality and is also slower.
-    // sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
-    sf->allow_partition_search_skip = 1;
-    sf->disable_wedge_search_var_thresh = 100;
-    sf->fast_wedge_sign_estimate = 1;
-  }
-
-  if (speed >= 3) {
-    sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL;
-    sf->less_rectangular_check_level = 2;
-    sf->adaptive_pred_interp_filter = 1;
-    // adaptive_motion_search breaks encoder multi-thread tests.
-    // The values in x->pred_mv[] differ for single and multi-thread cases.
-    // See aomedia:1778.
-    // sf->adaptive_motion_search = 1;
-    sf->recode_loop = ALLOW_RECODE_KFARFGF;
-    sf->use_transform_domain_distortion = 1;
-    sf->use_accurate_subpel_search = 0;
-    sf->adaptive_rd_thresh = 2;
-    sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
-    sf->gm_search_type = GM_DISABLE_SEARCH;
-    sf->prune_comp_search_by_single_result = 2;
-  }
-
-  if (speed >= 4) {
-    sf->tx_type_search.fast_intra_tx_type_search = 1;
-    sf->tx_type_search.fast_inter_tx_type_search = 1;
-    sf->use_square_partition_only_threshold =
-        boosted ? BLOCK_128X128 : BLOCK_4X4;
-    sf->tx_size_search_method =
-        frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
-    sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
-    sf->adaptive_pred_interp_filter = 0;
-    sf->adaptive_mode_search = 1;
-    sf->cb_partition_search = !boosted;
-    sf->alt_ref_search_fp = 1;
-  }
-
-  if (speed >= 5) {
-    sf->recode_loop = ALLOW_RECODE_KFMAXBW;
-    sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
-    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
-    sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
-    sf->use_square_partition_only_threshold = BLOCK_4X4;
-    sf->tx_size_search_method = USE_LARGESTALL;
-    sf->mv.search_method = BIGDIA;
-    sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
-    sf->adaptive_rd_thresh = 4;
-    sf->mode_search_skip_flags =
-        (cm->frame_type == KEY_FRAME)
-            ? 0
-            : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
-                  FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
-                  FLAG_EARLY_TERMINATE;
-    sf->disable_filter_search_var_thresh = 200;
-    sf->use_fast_coef_costing = 1;
-    sf->partition_search_breakout_rate_thr = 300;
-    sf->use_transform_domain_distortion = 2;
-  }
-
-  if (speed >= 6) {
-    int i;
-    sf->optimize_coefficients = NO_TRELLIS_OPT;
-    sf->mv.search_method = HEX;
-    sf->disable_filter_search_var_thresh = 500;
-    for (i = 0; i < TX_SIZES; ++i) {
-      sf->intra_y_mode_mask[i] = INTRA_DC;
-      sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
-    }
-    sf->partition_search_breakout_rate_thr = 500;
-    sf->mv.reduce_first_step_size = 1;
-    sf->simple_model_rd_from_var = 1;
-  }
-  if (speed >= 7) {
-    sf->default_max_partition_size = BLOCK_32X32;
-    sf->default_min_partition_size = BLOCK_8X8;
-    sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
-    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
-    sf->frame_parameter_update = 0;
-    sf->mv.search_method = FAST_HEX;
-    sf->partition_search_type = REFERENCE_PARTITION;
-    sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
-  }
-  if (speed >= 8) {
-    sf->mv.search_method = FAST_DIAMOND;
-    sf->mv.subpel_force_stop = 2;
-    sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
-  }
-}
-
-void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) {
-  SPEED_FEATURES *const sf = &cpi->sf;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  RD_OPT *const rd = &cpi->rd;
-  int i;
-
-  if (oxcf->mode == GOOD) {
-    set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
-  }
-
-  if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
-    sf->adaptive_pred_interp_filter = 0;
-  }
-
-  // Check for masked out split cases.
-  for (i = 0; i < MAX_REFS; ++i) {
-    if (sf->disable_split_mask & (1 << i)) {
-      rd->thresh_mult_sub8x8[i] = INT_MAX;
-    }
-  }
-
-  // This is only used in motion vector unit test.
-  if (cpi->oxcf.motion_vector_unit_test == 1)
-    cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
-  else if (cpi->oxcf.motion_vector_unit_test == 2)
-    cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
-}
-
-void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  SPEED_FEATURES *const sf = &cpi->sf;
-  MACROBLOCK *const x = &cpi->td.mb;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  int i;
-
-  // best quality defaults
-  sf->frame_parameter_update = 1;
-  sf->mv.search_method = NSTEP;
-  sf->recode_loop = ALLOW_RECODE;
-  sf->mv.subpel_search_method = SUBPEL_TREE;
-  sf->mv.subpel_iters_per_step = 2;
-  sf->mv.subpel_force_stop = 0;
-#if DISABLE_TRELLISQ_SEARCH == 2
-  sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf)
-                                  ? FINAL_PASS_TRELLIS_OPT
-                                  : NO_TRELLIS_OPT;
-#elif DISABLE_TRELLISQ_SEARCH == 1
-  sf->optimize_coefficients = NO_TRELLIS_OPT;
-#else
-  if (is_lossless_requested(&cpi->oxcf))
-    sf->optimize_coefficients = NO_TRELLIS_OPT;
-  else
-    sf->optimize_coefficients = FULL_TRELLIS_OPT;
-#endif  // DISABLE_TRELLISQ_SEARCH
-  sf->gm_erroradv_type = GM_ERRORADV_TR_0;
-  sf->mv.reduce_first_step_size = 0;
-  sf->mv.auto_mv_step_size = 0;
-  sf->comp_inter_joint_search_thresh = BLOCK_4X4;
-  sf->adaptive_rd_thresh = 0;
-  sf->tx_size_search_method = USE_FULL_RD;
-  sf->inter_tx_size_search_init_depth_sqr = 0;
-  sf->inter_tx_size_search_init_depth_rect = 0;
-  sf->intra_tx_size_search_init_depth_rect = 0;
-  sf->intra_tx_size_search_init_depth_sqr = 0;
-  sf->tx_size_search_lgr_block = 0;
-  sf->model_based_prune_tx_search_level = 0;
-  sf->model_based_post_interp_filter_breakout = 0;
-  sf->reduce_inter_modes = 0;
-  sf->selective_ref_gm = 1;
-  sf->adaptive_motion_search = 0;
-  sf->adaptive_pred_interp_filter = 0;
-  sf->adaptive_mode_search = 0;
-  sf->cb_partition_search = 0;
-  sf->alt_ref_search_fp = 0;
-  sf->partition_search_type = SEARCH_PARTITION;
-  sf->tx_type_search.prune_mode = PRUNE_2D_ACCURATE;
-  sf->tx_type_search.ml_tx_split_thresh = 30;
-  sf->tx_type_search.use_skip_flag_prediction = 1;
-  sf->tx_type_search.fast_intra_tx_type_search = 0;
-  sf->tx_type_search.fast_inter_tx_type_search = 0;
-  sf->tx_type_search.skip_tx_search = 0;
-  sf->selective_ref_frame = 0;
-  sf->less_rectangular_check_level = 0;
-  sf->use_square_partition_only_threshold = BLOCK_128X128;
-  sf->prune_ref_frame_for_rect_partitions = 0;
-  sf->auto_min_max_partition_size = NOT_IN_USE;
-  sf->rd_auto_partition_min_limit = BLOCK_4X4;
-  sf->default_max_partition_size = BLOCK_LARGEST;
-  sf->default_min_partition_size = BLOCK_4X4;
-  sf->adjust_partitioning_from_last_frame = 0;
-  sf->disable_split_mask = 0;
-  sf->mode_search_skip_flags = 0;
-  sf->disable_filter_search_var_thresh = 0;
-  sf->allow_partition_search_skip = 0;
-  sf->use_accurate_subpel_search = 2;
-  sf->disable_wedge_search_var_thresh = 0;
-  sf->fast_wedge_sign_estimate = 0;
-  sf->drop_ref = 0;
-  sf->skip_intra_in_interframe = 1;
-  sf->txb_split_cap = 1;
-  sf->adaptive_txb_search_level = 0;
-  sf->two_pass_partition_search = 0;
-  sf->mode_pruning_based_on_two_pass_partition_search = 0;
-  sf->use_intra_txb_hash = 0;
-  sf->use_inter_txb_hash = 1;
-  sf->use_mb_rd_hash = 1;
-  sf->optimize_b_precheck = 0;
-  sf->jnt_comp_fast_tx_search = 0;
-  sf->jnt_comp_skip_mv_search = 0;
-  sf->reuse_inter_intra_mode = 0;
-
-  for (i = 0; i < TX_SIZES; i++) {
-    sf->intra_y_mode_mask[i] = INTRA_ALL;
-    sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
-  }
-  sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
-  sf->use_fast_coef_costing = 0;
-  sf->max_intra_bsize = BLOCK_LARGEST;
-  // This setting only takes effect when partition_search_type is set
-  // to FIXED_PARTITION.
-  sf->always_this_block_size = BLOCK_16X16;
-  // Recode loop tolerance %.
-  sf->recode_tolerance = 25;
-  sf->partition_search_breakout_dist_thr = 0;
-  sf->partition_search_breakout_rate_thr = 0;
-  sf->simple_model_rd_from_var = 0;
-  sf->prune_ext_partition_types_search_level = 0;
-  sf->ml_prune_rect_partition = 0;
-  sf->ml_prune_ab_partition = 0;
-  sf->ml_prune_4_partition = 0;
-  sf->fast_cdef_search = 0;
-  for (i = 0; i < PARTITION_BLOCK_SIZES; ++i)
-    sf->ml_partition_search_breakout_thresh[i] = -1;  // -1 means not enabled.
-
-  // Set this at the appropriate speed levels
-  sf->use_transform_domain_distortion = 0;
-  sf->gm_search_type = GM_FULL_SEARCH;
-  sf->gm_disable_recode = 0;
-  sf->use_fast_interpolation_filter_search = 0;
-  sf->skip_repeat_interpolation_filter_search = 0;
-  sf->use_hash_based_trellis = 0;
-  sf->prune_comp_search_by_single_result = 0;
-  sf->skip_repeated_newmv = 0;
-
-  // Set decoder side speed feature to use less dual sgr modes
-  sf->dual_sgr_penalty_level = 0;
-
-  sf->inter_mode_rd_model_estimation = 0;
-  sf->obmc_full_pixel_search_level = 0;
-
-  if (oxcf->mode == GOOD)
-    set_good_speed_features_framesize_independent(cpi, sf, oxcf->speed);
-
-  // sf->partition_search_breakout_dist_thr is set assuming max 64x64
-  // blocks. Normalise this if the blocks are bigger.
-  if (MAX_SB_SIZE_LOG2 > 6) {
-    sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6);
-  }
-
-  cpi->diamond_search_sad = av1_diamond_search_sad;
-
-  sf->allow_exhaustive_searches = 1;
-  int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
-  if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
-    sf->exhaustive_searches_thresh = (1 << 24);
-  else
-    sf->exhaustive_searches_thresh = (1 << 25);
-  sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
-  if (speed > 0)
-    sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
-
-  for (i = 0; i < MAX_MESH_STEP; ++i) {
-    sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
-    sf->mesh_patterns[i].interval =
-        good_quality_mesh_patterns[speed][i].interval;
-  }
-  if ((frame_is_intra_only(cm) && cm->allow_screen_content_tools) &&
-      (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
-       cpi->oxcf.content == AOM_CONTENT_SCREEN)) {
-    for (i = 0; i < MAX_MESH_STEP; ++i) {
-      sf->mesh_patterns[i].range = intrabc_mesh_patterns[speed][i].range;
-      sf->mesh_patterns[i].interval = intrabc_mesh_patterns[speed][i].interval;
-    }
-    sf->max_exaustive_pct = intrabc_max_mesh_pct[speed];
-  }
-
-  // Slow quant, dct and trellis not worthwhile for first pass
-  // so make sure they are always turned off.
-  if (oxcf->pass == 1) sf->optimize_coefficients = NO_TRELLIS_OPT;
-
-  // No recode for 1 pass.
-  if (oxcf->pass == 0) {
-    sf->recode_loop = DISALLOW_RECODE;
-    sf->optimize_coefficients = NO_TRELLIS_OPT;
-  }
-
-  if (sf->mv.subpel_search_method == SUBPEL_TREE) {
-    cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
-  } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
-    cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned;
-  } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
-    cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_more;
-  } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) {
-    cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_evenmore;
-  }
-
-  cpi->optimize_speed_feature =
-      oxcf->pass != 1 ? sf->optimize_coefficients : NO_TRELLIS_OPT;
-  // FIXME: trellis not very efficient for quantisation matrices
-  if (cm->using_qmatrix) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
-  if (oxcf->disable_trellis_quant) cpi->optimize_speed_feature = NO_TRELLIS_OPT;
-
-  x->min_partition_size = sf->default_min_partition_size;
-  x->max_partition_size = sf->default_max_partition_size;
-
-  // This is only used in motion vector unit test.
-  if (cpi->oxcf.motion_vector_unit_test == 1)
-    cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
-  else if (cpi->oxcf.motion_vector_unit_test == 2)
-    cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
-
-#if CONFIG_DIST_8X8
-  if (sf->use_transform_domain_distortion > 0) cpi->oxcf.using_dist_8x8 = 0;
-
-  if (cpi->oxcf.using_dist_8x8) x->min_partition_size = BLOCK_8X8;
-#endif  // CONFIG_DIST_8X8
-}
diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h
deleted file mode 100644
index 41013b2e7..000000000
--- a/third_party/aom/av1/encoder/speed_features.h
+++ /dev/null
@@ -1,568 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_SPEED_FEATURES_H_
-#define AOM_AV1_ENCODER_SPEED_FEATURES_H_
-
-#include "av1/common/enums.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum {
-  INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
-              (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) |
-              (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) |
-              (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | (1 << PAETH_PRED),
-  UV_INTRA_ALL =
-      (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) |
-      (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | (1 << UV_D113_PRED) |
-      (1 << UV_D157_PRED) | (1 << UV_D203_PRED) | (1 << UV_D67_PRED) |
-      (1 << UV_SMOOTH_PRED) | (1 << UV_SMOOTH_V_PRED) |
-      (1 << UV_SMOOTH_H_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED),
-  UV_INTRA_DC = (1 << UV_DC_PRED),
-  UV_INTRA_DC_CFL = (1 << UV_DC_PRED) | (1 << UV_CFL_PRED),
-  UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED),
-  UV_INTRA_DC_PAETH_CFL =
-      (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED),
-  UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED),
-  UV_INTRA_DC_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) |
-                        (1 << UV_H_PRED) | (1 << UV_CFL_PRED),
-  UV_INTRA_DC_PAETH_H_V = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) |
-                          (1 << UV_V_PRED) | (1 << UV_H_PRED),
-  UV_INTRA_DC_PAETH_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) |
-                              (1 << UV_V_PRED) | (1 << UV_H_PRED) |
-                              (1 << UV_CFL_PRED),
-  INTRA_DC = (1 << DC_PRED),
-  INTRA_DC_TM = (1 << DC_PRED) | (1 << PAETH_PRED),
-  INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
-  INTRA_DC_PAETH_H_V =
-      (1 << DC_PRED) | (1 << PAETH_PRED) | (1 << V_PRED) | (1 << H_PRED)
-};
-
-enum {
-  INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) |
-              (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) |
-              (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) |
-              (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) | (1 << GLOBAL_GLOBALMV),
-  INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) |
-                            (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) |
-                            (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
-                            (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
-                            (1 << NEAR_NEARMV),
-};
-
-enum {
-  DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
-                            (1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST),
-
-  DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
-
-  DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
-
-  LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
-                              (1 << THR_ALTR) | (1 << THR_GOLD)
-};
-
-typedef enum {
-  TXFM_CODING_SF = 1,
-  INTER_PRED_SF = 2,
-  INTRA_PRED_SF = 4,
-  PARTITION_SF = 8,
-  LOOP_FILTER_SF = 16,
-  RD_SKIP_SF = 32,
-  RESERVE_2_SF = 64,
-  RESERVE_3_SF = 128,
-} DEV_SPEED_FEATURES;
-
-typedef enum {
-  DIAMOND = 0,
-  NSTEP = 1,
-  HEX = 2,
-  BIGDIA = 3,
-  SQUARE = 4,
-  FAST_HEX = 5,
-  FAST_DIAMOND = 6
-} SEARCH_METHODS;
-
-typedef enum {
-  // No recode.
-  DISALLOW_RECODE = 0,
-  // Allow recode for KF and exceeding maximum frame bandwidth.
-  ALLOW_RECODE_KFMAXBW = 1,
-  // Allow recode only for KF/ARF/GF frames.
-  ALLOW_RECODE_KFARFGF = 2,
-  // Allow recode for all frames based on bitrate constraints.
-  ALLOW_RECODE = 3,
-} RECODE_LOOP_TYPE;
-
-typedef enum {
-  SUBPEL_TREE = 0,
-  SUBPEL_TREE_PRUNED = 1,           // Prunes 1/2-pel searches
-  SUBPEL_TREE_PRUNED_MORE = 2,      // Prunes 1/2-pel searches more aggressively
-  SUBPEL_TREE_PRUNED_EVENMORE = 3,  // Prunes 1/2- and 1/4-pel searches
-  // Other methods to come
-} SUBPEL_SEARCH_METHODS;
-
-typedef enum {
-  USE_FULL_RD = 0,
-  USE_FAST_RD,
-  USE_LARGESTALL,
-} TX_SIZE_SEARCH_METHOD;
-
-typedef enum {
-  NOT_IN_USE = 0,
-  RELAXED_NEIGHBORING_MIN_MAX = 1
-} AUTO_MIN_MAX_MODE;
-
-typedef enum {
-  // Try the full image with different values.
-  LPF_PICK_FROM_FULL_IMAGE,
-  // Try a small portion of the image with different values.
-  LPF_PICK_FROM_SUBIMAGE,
-  // Estimate the level based on quantizer and frame type
-  LPF_PICK_FROM_Q,
-  // Pick 0 to disable LPF if LPF was enabled last frame
-  LPF_PICK_MINIMAL_LPF
-} LPF_PICK_METHOD;
-
-typedef enum {
-  // Terminate search early based on distortion so far compared to
-  // qp step, distortion in the neighborhood of the frame, etc.
-  FLAG_EARLY_TERMINATE = 1 << 0,
-
-  // Skips comp inter modes if the best so far is an intra mode.
-  FLAG_SKIP_COMP_BESTINTRA = 1 << 1,
-
-  // Skips oblique intra modes if the best so far is an inter mode.
-  FLAG_SKIP_INTRA_BESTINTER = 1 << 3,
-
-  // Skips oblique intra modes  at angles 27, 63, 117, 153 if the best
-  // intra so far is not one of the neighboring directions.
-  FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4,
-
-  // Skips intra modes other than DC_PRED if the source variance is small
-  FLAG_SKIP_INTRA_LOWVAR = 1 << 5,
-} MODE_SEARCH_SKIP_LOGIC;
-
-typedef enum {
-  NO_PRUNE = 0,
-  // eliminates one tx type in vertical and horizontal direction
-  PRUNE_ONE = 1,
-  // eliminates two tx types in each direction
-  PRUNE_TWO = 2,
-  // adaptively prunes the least perspective tx types out of all 16
-  // (tuned to provide negligible quality loss)
-  PRUNE_2D_ACCURATE = 3,
-  // similar, but applies much more aggressive pruning to get better speed-up
-  PRUNE_2D_FAST = 4,
-} TX_TYPE_PRUNE_MODE;
-
-typedef struct {
-  TX_TYPE_PRUNE_MODE prune_mode;
-  int fast_intra_tx_type_search;
-  int fast_inter_tx_type_search;
-
-  // Use a skip flag prediction model to detect blocks with skip = 1 early
-  // and avoid doing full TX type search for such blocks.
-  int use_skip_flag_prediction;
-
-  // Threshold used by the ML based method to predict TX block split decisions.
-  int ml_tx_split_thresh;
-
-  // skip remaining transform type search when we found the rdcost of skip is
-  // better than applying transform
-  int skip_tx_search;
-} TX_TYPE_SEARCH;
-
-typedef enum {
-  // Search partitions using RD criterion
-  SEARCH_PARTITION,
-
-  // Always use a fixed size partition
-  FIXED_PARTITION,
-
-  REFERENCE_PARTITION
-} PARTITION_SEARCH_TYPE;
-
-typedef struct MV_SPEED_FEATURES {
-  // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
-  SEARCH_METHODS search_method;
-
-  // This parameter controls which step in the n-step process we start at.
-  // It's changed adaptively based on circumstances.
-  int reduce_first_step_size;
-
-  // If this is set to 1, we limit the motion search range to 2 times the
-  // largest motion vector found in the last frame.
-  int auto_mv_step_size;
-
-  // Subpel_search_method can only be subpel_tree which does a subpixel
-  // logarithmic search that keeps stepping at 1/2 pixel units until
-  // you stop getting a gain, and then goes on to 1/4 and repeats
-  // the same process. Along the way it skips many diagonals.
-  SUBPEL_SEARCH_METHODS subpel_search_method;
-
-  // Maximum number of steps in logarithmic subpel search before giving up.
-  int subpel_iters_per_step;
-
-  // Control when to stop subpel search
-  int subpel_force_stop;
-} MV_SPEED_FEATURES;
-
-#define MAX_MESH_STEP 4
-
-typedef struct MESH_PATTERN {
-  int range;
-  int interval;
-} MESH_PATTERN;
-
-typedef enum {
-  GM_FULL_SEARCH,
-  GM_REDUCED_REF_SEARCH,
-  GM_DISABLE_SEARCH
-} GM_SEARCH_TYPE;
-
-typedef enum {
-  GM_ERRORADV_TR_0,
-  GM_ERRORADV_TR_1,
-  GM_ERRORADV_TR_2,
-  GM_ERRORADV_TR_TYPES,
-} GM_ERRORADV_TYPE;
-
-typedef enum {
-  NO_TRELLIS_OPT,         // No trellis optimization
-  FULL_TRELLIS_OPT,       // Trellis optimization in all stages
-  FINAL_PASS_TRELLIS_OPT  // Trellis optimization in only the final encode pass
-} TRELLIS_OPT_TYPE;
-
-typedef enum {
-  FULL_TXFM_RD,
-  LOW_TXFM_RD,
-} TXFM_RD_MODEL;
-
-typedef struct SPEED_FEATURES {
-  MV_SPEED_FEATURES mv;
-
-  // Frame level coding parameter update
-  int frame_parameter_update;
-
-  RECODE_LOOP_TYPE recode_loop;
-
-  // Trellis (dynamic programming) optimization of quantized values
-  TRELLIS_OPT_TYPE optimize_coefficients;
-
-  // Global motion warp error threshold
-  GM_ERRORADV_TYPE gm_erroradv_type;
-
-  // Always set to 0. If on it enables 0 cost background transmission
-  // (except for the initial transmission of the segmentation). The feature is
-  // disabled because the addition of very large block sizes make the
-  // backgrounds very to cheap to encode, and the segmentation we have
-  // adds overhead.
-  int static_segmentation;
-
-  // Limit the inter mode tested in the RD loop
-  int reduce_inter_modes;
-
-  // Do not compute the global motion parameters for a LAST2_FRAME or
-  // LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity
-  // global model.
-  int selective_ref_gm;
-
-  // If 1 we iterate finding a best reference for 2 ref frames together - via
-  // a log search that iterates 4 times (check around mv for last for best
-  // error of combined predictor then check around mv for alt). If 0 we
-  // we just use the best motion vector found for each frame by itself.
-  BLOCK_SIZE comp_inter_joint_search_thresh;
-
-  // This variable is used to cap the maximum number of times we skip testing a
-  // mode to be evaluated. A high value means we will be faster.
-  int adaptive_rd_thresh;
-
-  // Determine which method we use to determine transform size. We can choose
-  // between options like full rd, largest for prediction size, largest
-  // for intra and model coefs for the rest.
-  TX_SIZE_SEARCH_METHOD tx_size_search_method;
-
-  // Init search depth for square and rectangular transform partitions.
-  // Values:
-  // 0 - search full tree, 1: search 1 level, 2: search the highest level only
-  int inter_tx_size_search_init_depth_sqr;
-  int inter_tx_size_search_init_depth_rect;
-  int intra_tx_size_search_init_depth_sqr;
-  int intra_tx_size_search_init_depth_rect;
-  // If any dimension of a coding block size above 64, always search the
-  // largest transform only, since the largest transform block size is 64x64.
-  int tx_size_search_lgr_block;
-
-  PARTITION_SEARCH_TYPE partition_search_type;
-
-  TX_TYPE_SEARCH tx_type_search;
-
-  // Skip split transform block partition when the collocated bigger block
-  // is selected as all zero coefficients.
-  int txb_split_cap;
-
-  // Shortcut the transform block partition and type search when the target
-  // rdcost is relatively lower.
-  // Values are 0 (not used) , or 1 - 2 with progressively increasing
-  // aggressiveness
-  int adaptive_txb_search_level;
-
-  // Prune level for tx_size_type search for inter based on rd model
-  // 0: no pruning
-  // 1-2: progressively increasing aggressiveness of pruning
-  int model_based_prune_tx_search_level;
-
-  // Model based breakout after interpolation filter search
-  // 0: no breakout
-  // 1: use model based rd breakout
-  int model_based_post_interp_filter_breakout;
-
-  // Used if partition_search_type = FIXED_SIZE_PARTITION
-  BLOCK_SIZE always_this_block_size;
-
-  // Drop less likely to be picked reference frames in the RD search.
-  // Has three levels for now: 0, 1 and 2, where higher levels prune more
-  // aggressively than lower ones. (0 means no pruning).
-  int selective_ref_frame;
-
-  // Prune extended partition types search
-  // Can take values 0 - 2, 0 referring to no pruning, and 1 - 2 increasing
-  // aggressiveness of pruning in order.
-  int prune_ext_partition_types_search_level;
-
-  // Use a ML model to prune horz and vert partitions
-  int ml_prune_rect_partition;
-
-  // Use a ML model to prune horz_a, horz_b, vert_a and vert_b partitions.
-  int ml_prune_ab_partition;
-
-  // Use a ML model to prune horz4 and vert4 partitions.
-  int ml_prune_4_partition;
-
-  int fast_cdef_search;
-
-  // 2-pass coding block partition search
-  int two_pass_partition_search;
-
-  // Use the mode decisions made in the initial partition search to prune mode
-  // candidates, e.g. ref frames.
-  int mode_pruning_based_on_two_pass_partition_search;
-
-  // Skip rectangular partition test when partition type none gives better
-  // rd than partition type split. Can take values 0 - 2, 0 referring to no
-  // skipping, and 1 - 2 increasing aggressiveness of skipping in order.
-  int less_rectangular_check_level;
-
-  // Use square partition only beyond this block size.
-  BLOCK_SIZE use_square_partition_only_threshold;
-
-  // Prune reference frames for rectangular partitions.
-  int prune_ref_frame_for_rect_partitions;
-
-  // Sets min and max partition sizes for this superblock based on the
-  // same superblock in last encoded frame, and the left and above neighbor.
-  AUTO_MIN_MAX_MODE auto_min_max_partition_size;
-  // Ensures the rd based auto partition search will always
-  // go down at least to the specified level.
-  BLOCK_SIZE rd_auto_partition_min_limit;
-
-  // Min and max partition size we enable (block_size) as per auto
-  // min max, but also used by adjust partitioning, and pick_partitioning.
-  BLOCK_SIZE default_min_partition_size;
-  BLOCK_SIZE default_max_partition_size;
-
-  // Whether or not we allow partitions one smaller or one greater than the last
-  // frame's partitioning. Only used if use_lastframe_partitioning is set.
-  int adjust_partitioning_from_last_frame;
-
-  // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
-  // it always, to allow it for only Last frame and Intra, disable it for all
-  // inter modes or to enable it always.
-  int disable_split_mask;
-
-  // TODO(jingning): combine the related motion search speed features
-  // This allows us to use motion search at other sizes as a starting
-  // point for this motion search and limits the search range around it.
-  int adaptive_motion_search;
-
-  // Flag for allowing some use of exhaustive searches;
-  int allow_exhaustive_searches;
-
-  // Threshold for allowing exhaistive motion search.
-  int exhaustive_searches_thresh;
-
-  // Maximum number of exhaustive searches for a frame.
-  int max_exaustive_pct;
-
-  // Pattern to be used for any exhaustive mesh searches.
-  MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
-
-  // Allows sub 8x8 modes to use the prediction filter that was determined
-  // best for 8x8 mode. If set to 0 we always re check all the filters for
-  // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter
-  // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
-  int adaptive_pred_interp_filter;
-
-  // Adaptive prediction mode search
-  int adaptive_mode_search;
-
-  int cb_partition_search;
-
-  int alt_ref_search_fp;
-
-  // Implements various heuristics to skip searching modes
-  // The heuristics selected are based on  flags
-  // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
-  unsigned int mode_search_skip_flags;
-
-  // A source variance threshold below which filter search is disabled
-  // Choose a very large value (UINT_MAX) to use 8-tap always
-  unsigned int disable_filter_search_var_thresh;
-
-  // A source variance threshold below which wedge search is disabled
-  unsigned int disable_wedge_search_var_thresh;
-
-  // Whether fast wedge sign estimate is used
-  int fast_wedge_sign_estimate;
-
-  // These bit masks allow you to enable or disable intra modes for each
-  // transform size separately.
-  int intra_y_mode_mask[TX_SIZES];
-  int intra_uv_mode_mask[TX_SIZES];
-
-  // This feature controls how the loop filter level is determined.
-  LPF_PICK_METHOD lpf_pick;
-
-  // This feature controls whether we do the expensive context update and
-  // calculation in the rd coefficient costing loop.
-  int use_fast_coef_costing;
-
-  // This feature controls the tolerence vs target used in deciding whether to
-  // recode a frame. It has no meaning if recode is disabled.
-  int recode_tolerance;
-
-  // This variable controls the maximum block size where intra blocks can be
-  // used in inter frames.
-  // TODO(aconverse): Fold this into one of the other many mode skips
-  BLOCK_SIZE max_intra_bsize;
-
-  // Partition search early breakout thresholds.
-  int64_t partition_search_breakout_dist_thr;
-  int partition_search_breakout_rate_thr;
-
-  // Thresholds for ML based partition search breakout.
-  int ml_partition_search_breakout_thresh[PARTITION_BLOCK_SIZES];
-
-  // Allow skipping partition search for still image frame
-  int allow_partition_search_skip;
-
-  // Fast approximation of av1_model_rd_from_var_lapndz
-  int simple_model_rd_from_var;
-
-  // If true, sub-pixel search uses the exact convolve function used for final
-  // encoding and decoding; otherwise, it uses bilinear interpolation.
-  int use_accurate_subpel_search;
-
-  // Whether to compute distortion in the image domain (slower but
-  // more accurate), or in the transform domain (faster but less acurate).
-  // 0: use image domain
-  // 1: use transform domain in tx_type search, and use image domain for
-  // RD_STATS
-  // 2: use transform domain
-  int use_transform_domain_distortion;
-
-  GM_SEARCH_TYPE gm_search_type;
-
-  // whether to disable the global motion recode loop
-  int gm_disable_recode;
-
-  // Do limited interpolation filter search for dual filters, since best choice
-  // usually includes EIGHTTAP_REGULAR.
-  int use_fast_interpolation_filter_search;
-
-  // Save results of interpolation_filter_search for a block
-  // Check mv and ref_frames before search, if they are same with previous
-  // saved results, it can be skipped.
-  int skip_repeat_interpolation_filter_search;
-
-  // Use a hash table to store previously computed optimized qcoeffs from
-  // expensive calls to optimize_txb.
-  int use_hash_based_trellis;
-
-  // flag to drop some ref frames in compound motion search
-  int drop_ref;
-
-  // flag to allow skipping intra mode for inter frame prediction
-  int skip_intra_in_interframe;
-
-  // Use hash table to store intra(keyframe only) txb transform search results
-  // to avoid repeated search on the same residue signal.
-  int use_intra_txb_hash;
-
-  // Use hash table to store inter txb transform search results
-  // to avoid repeated search on the same residue signal.
-  int use_inter_txb_hash;
-
-  // Use hash table to store macroblock RD search results
-  // to avoid repeated search on the same residue signal.
-  int use_mb_rd_hash;
-
-  // Calculate RD cost before doing optimize_b, and skip if the cost is large.
-  int optimize_b_precheck;
-
-  // Use model rd instead of transform search in jnt_comp
-  int jnt_comp_fast_tx_search;
-
-  // Skip mv search in jnt_comp
-  int jnt_comp_skip_mv_search;
-
-  // Decoder side speed feature to add penalty for use of dual-sgr filters.
-  // Takes values 0 - 10, 0 indicating no penalty and each additional level
-  // adding a penalty of 1%
-  int dual_sgr_penalty_level;
-
-  // Dynamically estimate final rd from prediction error and mode cost
-  int inter_mode_rd_model_estimation;
-
-  // Skip some ref frames in compound motion search by single motion search
-  // result. Has three levels for now: 0 referring to no skipping, and 1 - 3
-  // increasing aggressiveness of skipping in order.
-  // Note: The search order might affect the result. It is better to search same
-  // single inter mode as a group.
-  int prune_comp_search_by_single_result;
-
-  // Reuse the inter_intra_mode search result from NEARESTMV mode to other
-  // single ref modes
-  int reuse_inter_intra_mode;
-
-  // Set the full pixel search level of obmc
-  // 0: obmc_full_pixel_diamond
-  // 1: obmc_refining_search_sad (faster)
-  int obmc_full_pixel_search_level;
-
-  // flag to skip NEWMV mode in drl if the motion search result is the same
-  int skip_repeated_newmv;
-} SPEED_FEATURES;
-
-struct AV1_COMP;
-
-void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi);
-void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_SPEED_FEATURES_H_
diff --git a/third_party/aom/av1/encoder/temporal_filter.c b/third_party/aom/av1/encoder/temporal_filter.c
deleted file mode 100644
index 75fdf02a5..000000000
--- a/third_party/aom/av1/encoder/temporal_filter.c
+++ /dev/null
@@ -1,602 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <limits.h>
-
-#include "config/aom_config.h"
-
-#include "av1/common/alloccommon.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/quant_common.h"
-#include "av1/common/reconinter.h"
-#include "av1/common/odintrin.h"
-#include "av1/encoder/av1_quantize.h"
-#include "av1/encoder/extend.h"
-#include "av1/encoder/firstpass.h"
-#include "av1/encoder/mcomp.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/ratectrl.h"
-#include "av1/encoder/reconinter_enc.h"
-#include "av1/encoder/segmentation.h"
-#include "av1/encoder/temporal_filter.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-#include "aom_ports/aom_timer.h"
-#include "aom_scale/aom_scale.h"
-
-static void temporal_filter_predictors_mb_c(
-    MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
-    int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
-    uint8_t *pred, struct scale_factors *scale, int x, int y,
-    int can_use_previous, int num_planes) {
-  const MV mv = { mv_row, mv_col };
-  enum mv_precision mv_precision_uv;
-  int uv_stride;
-  // TODO(angiebird): change plane setting accordingly
-  ConvolveParams conv_params = get_conv_params(0, 0, xd->bd);
-  const InterpFilters interp_filters = xd->mi[0]->interp_filters;
-  WarpTypesAllowed warp_types;
-  memset(&warp_types, 0, sizeof(WarpTypesAllowed));
-
-  if (uv_block_width == 8) {
-    uv_stride = (stride + 1) >> 1;
-    mv_precision_uv = MV_PRECISION_Q4;
-  } else {
-    uv_stride = stride;
-    mv_precision_uv = MV_PRECISION_Q3;
-  }
-  av1_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
-                            &conv_params, interp_filters, &warp_types, x, y, 0,
-                            0, MV_PRECISION_Q3, x, y, xd, can_use_previous);
-
-  if (num_planes > 1) {
-    av1_build_inter_predictor(
-        u_mb_ptr, uv_stride, &pred[256], uv_block_width, &mv, scale,
-        uv_block_width, uv_block_height, &conv_params, interp_filters,
-        &warp_types, x, y, 1, 0, mv_precision_uv, x, y, xd, can_use_previous);
-
-    av1_build_inter_predictor(
-        v_mb_ptr, uv_stride, &pred[512], uv_block_width, &mv, scale,
-        uv_block_width, uv_block_height, &conv_params, interp_filters,
-        &warp_types, x, y, 2, 0, mv_precision_uv, x, y, xd, can_use_previous);
-  }
-}
-
-void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
-                                 uint8_t *frame2, unsigned int block_width,
-                                 unsigned int block_height, int strength,
-                                 int filter_weight, unsigned int *accumulator,
-                                 uint16_t *count) {
-  unsigned int i, j, k;
-  int modifier;
-  int byte = 0;
-  const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
-
-  for (i = 0, k = 0; i < block_height; i++) {
-    for (j = 0; j < block_width; j++, k++) {
-      int pixel_value = *frame2;
-
-      // non-local mean approach
-      int diff_sse[9] = { 0 };
-      int idx, idy, index = 0;
-
-      for (idy = -1; idy <= 1; ++idy) {
-        for (idx = -1; idx <= 1; ++idx) {
-          int row = (int)i + idy;
-          int col = (int)j + idx;
-
-          if (row >= 0 && row < (int)block_height && col >= 0 &&
-              col < (int)block_width) {
-            int diff = frame1[byte + idy * (int)stride + idx] -
-                       frame2[idy * (int)block_width + idx];
-            diff_sse[index] = diff * diff;
-            ++index;
-          }
-        }
-      }
-
-      assert(index > 0);
-
-      modifier = 0;
-      for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
-
-      modifier *= 3;
-      modifier /= index;
-
-      ++frame2;
-
-      modifier += rounding;
-      modifier >>= strength;
-
-      if (modifier > 16) modifier = 16;
-
-      modifier = 16 - modifier;
-      modifier *= filter_weight;
-
-      count[k] += modifier;
-      accumulator[k] += modifier * pixel_value;
-
-      byte++;
-    }
-
-    byte += stride - block_width;
-  }
-}
-
-void av1_highbd_temporal_filter_apply_c(
-    uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8,
-    unsigned int block_width, unsigned int block_height, int strength,
-    int filter_weight, unsigned int *accumulator, uint16_t *count) {
-  uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
-  uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
-  unsigned int i, j, k;
-  int modifier;
-  int byte = 0;
-  const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
-
-  for (i = 0, k = 0; i < block_height; i++) {
-    for (j = 0; j < block_width; j++, k++) {
-      int pixel_value = *frame2;
-
-      // non-local mean approach
-      int diff_sse[9] = { 0 };
-      int idx, idy, index = 0;
-
-      for (idy = -1; idy <= 1; ++idy) {
-        for (idx = -1; idx <= 1; ++idx) {
-          int row = (int)i + idy;
-          int col = (int)j + idx;
-
-          if (row >= 0 && row < (int)block_height && col >= 0 &&
-              col < (int)block_width) {
-            int diff = frame1[byte + idy * (int)stride + idx] -
-                       frame2[idy * (int)block_width + idx];
-            diff_sse[index] = diff * diff;
-            ++index;
-          }
-        }
-      }
-
-      assert(index > 0);
-
-      modifier = 0;
-      for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
-
-      modifier *= 3;
-      modifier /= index;
-
-      ++frame2;
-
-      modifier += rounding;
-      modifier >>= strength;
-
-      if (modifier > 16) modifier = 16;
-
-      modifier = 16 - modifier;
-      modifier *= filter_weight;
-
-      count[k] += modifier;
-      accumulator[k] += modifier * pixel_value;
-
-      byte++;
-    }
-
-    byte += stride - block_width;
-  }
-}
-
-static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi,
-                                              uint8_t *arf_frame_buf,
-                                              uint8_t *frame_ptr_buf,
-                                              int stride, int x_pos,
-                                              int y_pos) {
-  MACROBLOCK *const x = &cpi->td.mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
-  int step_param;
-  int sadpb = x->sadperbit16;
-  int bestsme = INT_MAX;
-  int distortion;
-  unsigned int sse;
-  int cost_list[5];
-  MvLimits tmp_mv_limits = x->mv_limits;
-
-  MV best_ref_mv1 = kZeroMv;
-  MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
-
-  // Save input state
-  struct buf_2d src = x->plane[0].src;
-  struct buf_2d pre = xd->plane[0].pre[0];
-
-  best_ref_mv1_full.col = best_ref_mv1.col >> 3;
-  best_ref_mv1_full.row = best_ref_mv1.row >> 3;
-
-  // Setup frame pointers
-  x->plane[0].src.buf = arf_frame_buf;
-  x->plane[0].src.stride = stride;
-  xd->plane[0].pre[0].buf = frame_ptr_buf;
-  xd->plane[0].pre[0].stride = stride;
-
-  step_param = mv_sf->reduce_first_step_size;
-  step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2);
-
-  av1_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
-
-  x->mvcost = x->mv_cost_stack;
-  x->nmvjointcost = x->nmv_vec_cost;
-
-  av1_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
-                        NSTEP, 1, sadpb, cond_cost_list(cpi, cost_list),
-                        &best_ref_mv1, 0, 0, x_pos, y_pos, 0);
-  x->mv_limits = tmp_mv_limits;
-
-  // Ignore mv costing by sending NULL pointer instead of cost array
-  if (cpi->common.cur_frame_force_integer_mv == 1) {
-    const uint8_t *const src_address = x->plane[0].src.buf;
-    const int src_stride = x->plane[0].src.stride;
-    const uint8_t *const y = xd->plane[0].pre[0].buf;
-    const int y_stride = xd->plane[0].pre[0].stride;
-    const int offset = x->best_mv.as_mv.row * y_stride + x->best_mv.as_mv.col;
-
-    x->best_mv.as_mv.row *= 8;
-    x->best_mv.as_mv.col *= 8;
-
-    bestsme = cpi->fn_ptr[BLOCK_16X16].vf(y + offset, y_stride, src_address,
-                                          src_stride, &sse);
-  } else {
-    bestsme = cpi->find_fractional_mv_step(
-        x, &cpi->common, 0, 0, &best_ref_mv1,
-        cpi->common.allow_high_precision_mv, x->errorperbit,
-        &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
-        cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
-        NULL, 0, 0, 0, 0, 0);
-  }
-
-  x->e_mbd.mi[0]->mv[0] = x->best_mv;
-
-  // Restore input state
-  x->plane[0].src = src;
-  xd->plane[0].pre[0] = pre;
-
-  return bestsme;
-}
-
-static void temporal_filter_iterate_c(AV1_COMP *cpi,
-                                      YV12_BUFFER_CONFIG **frames,
-                                      int frame_count, int alt_ref_index,
-                                      int strength,
-                                      struct scale_factors *scale) {
-  const AV1_COMMON *cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  int byte;
-  int frame;
-  int mb_col, mb_row;
-  unsigned int filter_weight;
-  int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
-  int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
-  int mb_y_offset = 0;
-  int mb_uv_offset = 0;
-  DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]);
-  DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
-  MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
-  YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
-  uint8_t *dst1, *dst2;
-  DECLARE_ALIGNED(32, uint16_t, predictor16[16 * 16 * 3]);
-  DECLARE_ALIGNED(32, uint8_t, predictor8[16 * 16 * 3]);
-  uint8_t *predictor;
-  const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
-  const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
-
-  // Save input state
-  uint8_t *input_buffer[MAX_MB_PLANE];
-  int i;
-  if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    predictor = CONVERT_TO_BYTEPTR(predictor16);
-  } else {
-    predictor = predictor8;
-  }
-
-  for (i = 0; i < num_planes; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
-
-  for (mb_row = 0; mb_row < mb_rows; mb_row++) {
-    // Source frames are extended to 16 pixels. This is different than
-    //  L/A/G reference frames that have a border of 32 (AV1ENCBORDERINPIXELS)
-    // A 6/8 tap filter is used for motion search.  This requires 2 pixels
-    //  before and 3 pixels after.  So the largest Y mv on a border would
-    //  then be 16 - AOM_INTERP_EXTEND. The UV blocks are half the size of the
-    //  Y and therefore only extended by 8.  The largest mv that a UV block
-    //  can support is 8 - AOM_INTERP_EXTEND.  A UV mv is half of a Y mv.
-    //  (16 - AOM_INTERP_EXTEND) >> 1 which is greater than
-    //  8 - AOM_INTERP_EXTEND.
-    // To keep the mv in play for both Y and UV planes the max that it
-    //  can be on a border is therefore 16 - (2*AOM_INTERP_EXTEND+1).
-    cpi->td.mb.mv_limits.row_min =
-        -((mb_row * 16) + (17 - 2 * AOM_INTERP_EXTEND));
-    cpi->td.mb.mv_limits.row_max =
-        ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
-
-    for (mb_col = 0; mb_col < mb_cols; mb_col++) {
-      int j, k;
-      int stride;
-
-      memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
-      memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
-
-      cpi->td.mb.mv_limits.col_min =
-          -((mb_col * 16) + (17 - 2 * AOM_INTERP_EXTEND));
-      cpi->td.mb.mv_limits.col_max =
-          ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
-
-      for (frame = 0; frame < frame_count; frame++) {
-        const int thresh_low = 10000;
-        const int thresh_high = 20000;
-
-        if (frames[frame] == NULL) continue;
-
-        mbd->mi[0]->mv[0].as_mv.row = 0;
-        mbd->mi[0]->mv[0].as_mv.col = 0;
-        mbd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
-
-        if (frame == alt_ref_index) {
-          filter_weight = 2;
-        } else {
-          // Find best match in this frame by MC
-          int err = temporal_filter_find_matching_mb_c(
-              cpi, frames[alt_ref_index]->y_buffer + mb_y_offset,
-              frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride,
-              mb_col * 16, mb_row * 16);
-
-          // Assign higher weight to matching MB if it's error
-          // score is lower. If not applying MC default behavior
-          // is to weight all MBs equal.
-          filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
-        }
-
-        if (filter_weight != 0) {
-          // Construct the predictors
-          temporal_filter_predictors_mb_c(
-              mbd, frames[frame]->y_buffer + mb_y_offset,
-              frames[frame]->u_buffer + mb_uv_offset,
-              frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
-              mb_uv_width, mb_uv_height, mbd->mi[0]->mv[0].as_mv.row,
-              mbd->mi[0]->mv[0].as_mv.col, predictor, scale, mb_col * 16,
-              mb_row * 16, cm->allow_warped_motion, num_planes);
-
-          // Apply the filter (YUV)
-          if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-            int adj_strength = strength + 2 * (mbd->bd - 8);
-            av1_highbd_temporal_filter_apply(
-                f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
-                adj_strength, filter_weight, accumulator, count);
-            if (num_planes > 1) {
-              av1_highbd_temporal_filter_apply(
-                  f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
-                  mb_uv_width, mb_uv_height, adj_strength, filter_weight,
-                  accumulator + 256, count + 256);
-              av1_highbd_temporal_filter_apply(
-                  f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
-                  mb_uv_width, mb_uv_height, adj_strength, filter_weight,
-                  accumulator + 512, count + 512);
-            }
-          } else {
-            av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
-                                        predictor, 16, 16, strength,
-                                        filter_weight, accumulator, count);
-            if (num_planes > 1) {
-              av1_temporal_filter_apply_c(
-                  f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
-                  mb_uv_width, mb_uv_height, strength, filter_weight,
-                  accumulator + 256, count + 256);
-              av1_temporal_filter_apply_c(
-                  f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
-                  mb_uv_width, mb_uv_height, strength, filter_weight,
-                  accumulator + 512, count + 512);
-            }
-          }
-        }
-      }
-
-      // Normalize filter output to produce AltRef frame
-      if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        uint16_t *dst1_16;
-        uint16_t *dst2_16;
-        dst1 = cpi->alt_ref_buffer.y_buffer;
-        dst1_16 = CONVERT_TO_SHORTPTR(dst1);
-        stride = cpi->alt_ref_buffer.y_stride;
-        byte = mb_y_offset;
-        for (i = 0, k = 0; i < 16; i++) {
-          for (j = 0; j < 16; j++, k++) {
-            dst1_16[byte] =
-                (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
-
-            // move to next pixel
-            byte++;
-          }
-
-          byte += stride - 16;
-        }
-        if (num_planes > 1) {
-          dst1 = cpi->alt_ref_buffer.u_buffer;
-          dst2 = cpi->alt_ref_buffer.v_buffer;
-          dst1_16 = CONVERT_TO_SHORTPTR(dst1);
-          dst2_16 = CONVERT_TO_SHORTPTR(dst2);
-          stride = cpi->alt_ref_buffer.uv_stride;
-          byte = mb_uv_offset;
-          for (i = 0, k = 256; i < mb_uv_height; i++) {
-            for (j = 0; j < mb_uv_width; j++, k++) {
-              int m = k + 256;
-              // U
-              dst1_16[byte] =
-                  (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
-              // V
-              dst2_16[byte] =
-                  (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
-              // move to next pixel
-              byte++;
-            }
-            byte += stride - mb_uv_width;
-          }
-        }
-      } else {
-        dst1 = cpi->alt_ref_buffer.y_buffer;
-        stride = cpi->alt_ref_buffer.y_stride;
-        byte = mb_y_offset;
-        for (i = 0, k = 0; i < 16; i++) {
-          for (j = 0; j < 16; j++, k++) {
-            dst1[byte] =
-                (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
-
-            // move to next pixel
-            byte++;
-          }
-          byte += stride - 16;
-        }
-        if (num_planes > 1) {
-          dst1 = cpi->alt_ref_buffer.u_buffer;
-          dst2 = cpi->alt_ref_buffer.v_buffer;
-          stride = cpi->alt_ref_buffer.uv_stride;
-          byte = mb_uv_offset;
-          for (i = 0, k = 256; i < mb_uv_height; i++) {
-            for (j = 0; j < mb_uv_width; j++, k++) {
-              int m = k + 256;
-              // U
-              dst1[byte] =
-                  (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
-              // V
-              dst2[byte] =
-                  (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
-              // move to next pixel
-              byte++;
-            }
-            byte += stride - mb_uv_width;
-          }
-        }
-      }
-      mb_y_offset += 16;
-      mb_uv_offset += mb_uv_width;
-    }
-    mb_y_offset += 16 * (f->y_stride - mb_cols);
-    mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols;
-  }
-
-  // Restore input state
-  for (i = 0; i < num_planes; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
-}
-
-// Apply buffer limits and context specific adjustments to arnr filter.
-static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost,
-                               int *arnr_frames, int *arnr_strength) {
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-  const int frames_after_arf =
-      av1_lookahead_depth(cpi->lookahead) - distance - 1;
-  int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
-  int frames_bwd;
-  int q, frames, strength;
-
-  // Define the forward and backwards filter limits for this arnr group.
-  if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
-  if (frames_fwd > distance) frames_fwd = distance;
-
-  frames_bwd = frames_fwd;
-
-  // For even length filter there is one more frame backward
-  // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
-  if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
-
-  // Set the baseline active filter size.
-  frames = frames_bwd + 1 + frames_fwd;
-
-  // Adjust the strength based on active max q.
-  if (cpi->common.current_video_frame > 1)
-    q = ((int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
-                                      cpi->common.seq_params.bit_depth));
-  else
-    q = ((int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
-                                      cpi->common.seq_params.bit_depth));
-  if (q > 16) {
-    strength = oxcf->arnr_strength;
-  } else {
-    strength = oxcf->arnr_strength - ((16 - q) / 2);
-    if (strength < 0) strength = 0;
-  }
-
-  // Adjust number of frames in filter and strength based on gf boost level.
-  if (frames > group_boost / 150) {
-    frames = group_boost / 150;
-    frames += !(frames & 1);
-  }
-
-  if (strength > group_boost / 300) {
-    strength = group_boost / 300;
-  }
-
-  *arnr_frames = frames;
-  *arnr_strength = strength;
-}
-
-void av1_temporal_filter(AV1_COMP *cpi, int distance) {
-  RATE_CONTROL *const rc = &cpi->rc;
-  int frame;
-  int frames_to_blur;
-  int start_frame;
-  int strength;
-  int frames_to_blur_backward;
-  int frames_to_blur_forward;
-  struct scale_factors sf;
-  YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
-  const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-
-  // Apply context specific adjustments to the arnr filter parameters.
-  adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
-  // TODO(weitinglin): Currently, we enforce the filtering strength on
-  //                   extra ARFs' to be zeros. We should investigate in which
-  //                   case it is more beneficial to use non-zero strength
-  //                   filtering.
-  if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
-    strength = 0;
-    frames_to_blur = 1;
-  }
-
-  int which_arf = gf_group->arf_update_idx[gf_group->index];
-
-  // Set the temporal filtering status for the corresponding OVERLAY frame
-  if (strength == 0 && frames_to_blur == 1)
-    cpi->is_arf_filter_off[which_arf] = 1;
-  else
-    cpi->is_arf_filter_off[which_arf] = 0;
-  cpi->common.showable_frame = cpi->is_arf_filter_off[which_arf];
-
-  frames_to_blur_backward = (frames_to_blur / 2);
-  frames_to_blur_forward = ((frames_to_blur - 1) / 2);
-  start_frame = distance + frames_to_blur_forward;
-
-  // Setup frame pointers, NULL indicates frame not included in filter.
-  for (frame = 0; frame < frames_to_blur; ++frame) {
-    const int which_buffer = start_frame - frame;
-    struct lookahead_entry *buf =
-        av1_lookahead_peek(cpi->lookahead, which_buffer);
-    frames[frames_to_blur - 1 - frame] = &buf->img;
-  }
-
-  if (frames_to_blur > 0) {
-    // Setup scaling factors. Scaling on each of the arnr frames is not
-    // supported.
-    // ARF is produced at the native frame size and resized when coded.
-    av1_setup_scale_factors_for_frame(
-        &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
-        frames[0]->y_crop_width, frames[0]->y_crop_height);
-  }
-
-  temporal_filter_iterate_c(cpi, frames, frames_to_blur,
-                            frames_to_blur_backward, strength, &sf);
-}
diff --git a/third_party/aom/av1/encoder/temporal_filter.h b/third_party/aom/av1/encoder/temporal_filter.h
deleted file mode 100644
index 2ddc68b2c..000000000
--- a/third_party/aom/av1/encoder/temporal_filter.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_TEMPORAL_FILTER_H_
-#define AOM_AV1_ENCODER_TEMPORAL_FILTER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_temporal_filter(AV1_COMP *cpi, int distance);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_TEMPORAL_FILTER_H_
diff --git a/third_party/aom/av1/encoder/tokenize.c b/third_party/aom/av1/encoder/tokenize.c
deleted file mode 100644
index 16a6a9a35..000000000
--- a/third_party/aom/av1/encoder/tokenize.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "aom_mem/aom_mem.h"
-
-#include "av1/common/entropy.h"
-#include "av1/common/pred_common.h"
-#include "av1/common/scan.h"
-#include "av1/common/seg_common.h"
-
-#include "av1/encoder/cost.h"
-#include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/rdopt.h"
-#include "av1/encoder/tokenize.h"
-
-static int cost_and_tokenize_map(Av1ColorMapParam *param, TOKENEXTRA **t,
-                                 int plane, int calc_rate, int allow_update_cdf,
-                                 FRAME_COUNTS *counts) {
-  const uint8_t *const color_map = param->color_map;
-  MapCdf map_cdf = param->map_cdf;
-  ColorCost color_cost = param->color_cost;
-  const int plane_block_width = param->plane_width;
-  const int rows = param->rows;
-  const int cols = param->cols;
-  const int n = param->n_colors;
-  const int palette_size_idx = n - PALETTE_MIN_SIZE;
-  int this_rate = 0;
-  uint8_t color_order[PALETTE_MAX_SIZE];
-
-  (void)plane;
-  (void)counts;
-
-  for (int k = 1; k < rows + cols - 1; ++k) {
-    for (int j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) {
-      int i = k - j;
-      int color_new_idx;
-      const int color_ctx = av1_get_palette_color_index_context(
-          color_map, plane_block_width, i, j, n, color_order, &color_new_idx);
-      assert(color_new_idx >= 0 && color_new_idx < n);
-      if (calc_rate) {
-        this_rate += (*color_cost)[palette_size_idx][color_ctx][color_new_idx];
-      } else {
-        (*t)->token = color_new_idx;
-        (*t)->color_map_cdf = map_cdf[palette_size_idx][color_ctx];
-        ++(*t);
-        if (allow_update_cdf)
-          update_cdf(map_cdf[palette_size_idx][color_ctx], color_new_idx, n);
-#if CONFIG_ENTROPY_STATS
-        if (plane) {
-          ++counts->palette_uv_color_index[palette_size_idx][color_ctx]
-                                          [color_new_idx];
-        } else {
-          ++counts->palette_y_color_index[palette_size_idx][color_ctx]
-                                         [color_new_idx];
-        }
-#endif
-      }
-    }
-  }
-  if (calc_rate) return this_rate;
-  return 0;
-}
-
-static void get_palette_params(const MACROBLOCK *const x, int plane,
-                               BLOCK_SIZE bsize, Av1ColorMapParam *params) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  params->color_map = xd->plane[plane].color_index_map;
-  params->map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf
-                          : xd->tile_ctx->palette_y_color_index_cdf;
-  params->color_cost =
-      plane ? &x->palette_uv_color_cost : &x->palette_y_color_cost;
-  params->n_colors = pmi->palette_size[plane];
-  av1_get_block_dimensions(bsize, plane, xd, &params->plane_width, NULL,
-                           &params->rows, &params->cols);
-}
-
-static void get_color_map_params(const MACROBLOCK *const x, int plane,
-                                 BLOCK_SIZE bsize, TX_SIZE tx_size,
-                                 COLOR_MAP_TYPE type,
-                                 Av1ColorMapParam *params) {
-  (void)tx_size;
-  memset(params, 0, sizeof(*params));
-  switch (type) {
-    case PALETTE_MAP: get_palette_params(x, plane, bsize, params); break;
-    default: assert(0 && "Invalid color map type"); return;
-  }
-}
-
-int av1_cost_color_map(const MACROBLOCK *const x, int plane, BLOCK_SIZE bsize,
-                       TX_SIZE tx_size, COLOR_MAP_TYPE type) {
-  assert(plane == 0 || plane == 1);
-  Av1ColorMapParam color_map_params;
-  get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params);
-  return cost_and_tokenize_map(&color_map_params, NULL, plane, 1, 0, NULL);
-}
-
-void av1_tokenize_color_map(const MACROBLOCK *const x, int plane,
-                            TOKENEXTRA **t, BLOCK_SIZE bsize, TX_SIZE tx_size,
-                            COLOR_MAP_TYPE type, int allow_update_cdf,
-                            FRAME_COUNTS *counts) {
-  assert(plane == 0 || plane == 1);
-  Av1ColorMapParam color_map_params;
-  get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params);
-  // The first color index does not use context or entropy.
-  (*t)->token = color_map_params.color_map[0];
-  (*t)->color_map_cdf = NULL;
-  ++(*t);
-  cost_and_tokenize_map(&color_map_params, t, plane, 0, allow_update_cdf,
-                        counts);
-}
-
-void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
-                    TX_SIZE tx_size, BLOCK_SIZE plane_bsize, int blk_row,
-                    int blk_col, int block, int plane, void *arg) {
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
-  const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
-
-  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
-
-  const TX_SIZE plane_tx_size =
-      plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x,
-                                    pd->subsampling_y)
-            : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
-                                                         blk_col)];
-
-  if (tx_size == plane_tx_size || plane) {
-    plane_bsize = get_plane_block_size(mbmi->sb_type, pd->subsampling_x,
-                                       pd->subsampling_y);
-    if (!dry_run) {
-      av1_update_and_record_txb_context(plane, block, blk_row, blk_col,
-                                        plane_bsize, tx_size, arg);
-    } else if (dry_run == DRY_RUN_NORMAL) {
-      av1_update_txb_context_b(plane, block, blk_row, blk_col, plane_bsize,
-                               tx_size, arg);
-    } else {
-      printf("DRY_RUN_COSTCOEFFS is not supported yet\n");
-      assert(0);
-    }
-  } else {
-    // Half the block size in transform block unit.
-    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    const int bsw = tx_size_wide_unit[sub_txs];
-    const int bsh = tx_size_high_unit[sub_txs];
-    const int step = bsw * bsh;
-
-    assert(bsw > 0 && bsh > 0);
-
-    for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
-      for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
-        const int offsetr = blk_row + row;
-        const int offsetc = blk_col + col;
-
-        if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
-        tokenize_vartx(td, t, dry_run, sub_txs, plane_bsize, offsetr, offsetc,
-                       block, plane, arg);
-        block += step;
-      }
-    }
-  }
-}
-
-void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
-                           RUN_TYPE dry_run, int mi_row, int mi_col,
-                           BLOCK_SIZE bsize, int *rate,
-                           uint8_t allow_update_cdf) {
-  const AV1_COMMON *const cm = &cpi->common;
-  const int num_planes = av1_num_planes(cm);
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = xd->mi[0];
-  (void)t;
-  struct tokenize_b_args arg = { cpi, td, t, 0, allow_update_cdf };
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
-  if (mbmi->skip) {
-    av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes);
-    return;
-  }
-
-  for (int plane = 0; plane < num_planes; ++plane) {
-    if (!is_chroma_reference(mi_row, mi_col, bsize,
-                             xd->plane[plane].subsampling_x,
-                             xd->plane[plane].subsampling_y)) {
-      continue;
-    }
-    const struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE bsizec =
-        scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
-    const BLOCK_SIZE plane_bsize =
-        get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y);
-    const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
-    const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
-    const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane);
-    const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
-    int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
-    int bh = block_size_high[txb_size] >> tx_size_high_log2[0];
-    int idx, idy;
-    int block = 0;
-    int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
-
-    const BLOCK_SIZE max_unit_bsize =
-        get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y);
-    int mu_blocks_wide =
-        block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
-    int mu_blocks_high =
-        block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
-
-    mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide);
-    mu_blocks_high = AOMMIN(mi_height, mu_blocks_high);
-
-    for (idy = 0; idy < mi_height; idy += mu_blocks_high) {
-      for (idx = 0; idx < mi_width; idx += mu_blocks_wide) {
-        int blk_row, blk_col;
-        const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height);
-        const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width);
-        for (blk_row = idy; blk_row < unit_height; blk_row += bh) {
-          for (blk_col = idx; blk_col < unit_width; blk_col += bw) {
-            tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, blk_row,
-                           blk_col, block, plane, &arg);
-            block += step;
-          }
-        }
-      }
-    }
-  }
-  if (rate) *rate += arg.this_rate;
-}
diff --git a/third_party/aom/av1/encoder/tokenize.h b/third_party/aom/av1/encoder/tokenize.h
deleted file mode 100644
index 63b505f36..000000000
--- a/third_party/aom/av1/encoder/tokenize.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_TOKENIZE_H_
-#define AOM_AV1_ENCODER_TOKENIZE_H_
-
-#include "av1/common/entropy.h"
-#include "av1/encoder/block.h"
-#include "aom_dsp/bitwriter.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
-  aom_cdf_prob *color_map_cdf;
-  // TODO(yaowu: use packed enum type if appropriate)
-  uint8_t token;
-} TOKENEXTRA;
-
-struct AV1_COMP;
-struct ThreadData;
-struct FRAME_COUNTS;
-
-struct tokenize_b_args {
-  const struct AV1_COMP *cpi;
-  struct ThreadData *td;
-  TOKENEXTRA **tp;
-  int this_rate;
-  uint8_t allow_update_cdf;
-};
-
-typedef enum {
-  OUTPUT_ENABLED = 0,
-  DRY_RUN_NORMAL,
-  DRY_RUN_COSTCOEFFS,
-} RUN_TYPE;
-
-// Note in all the tokenize functions rate if non NULL is incremented
-// with the coefficient token cost only if dry_run = DRY_RUN_COSTCOEFS,
-// otherwise rate is not incremented.
-void av1_tokenize_sb_vartx(const struct AV1_COMP *cpi, struct ThreadData *td,
-                           TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
-                           int mi_col, BLOCK_SIZE bsize, int *rate,
-                           uint8_t allow_update_cdf);
-
-int av1_cost_color_map(const MACROBLOCK *const x, int plane, BLOCK_SIZE bsize,
-                       TX_SIZE tx_size, COLOR_MAP_TYPE type);
-
-void av1_tokenize_color_map(const MACROBLOCK *const x, int plane,
-                            TOKENEXTRA **t, BLOCK_SIZE bsize, TX_SIZE tx_size,
-                            COLOR_MAP_TYPE type, int allow_update_cdf,
-                            struct FRAME_COUNTS *counts);
-
-static INLINE int av1_get_tx_eob(const struct segmentation *seg, int segment_id,
-                                 TX_SIZE tx_size) {
-  const int eob_max = av1_get_max_eob(tx_size);
-  return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_TOKENIZE_H_
diff --git a/third_party/aom/av1/encoder/tx_prune_model_weights.h b/third_party/aom/av1/encoder/tx_prune_model_weights.h
deleted file mode 100644
index 405bc9e6e..000000000
--- a/third_party/aom/av1/encoder/tx_prune_model_weights.h
+++ /dev/null
@@ -1,1944 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_
-#define AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "av1/encoder/ml.h"
-
-// Tx type model for 4x4 block.
-static const float av1_tx_type_nn_weights_4x4_hor_layer0[32] = {
-  -1.64947f, -1.54497f, -1.62832f, -0.17774f, -2.89498f, -0.72498f, 0.72036f,
-  0.17996f,  1.20000f,  -0.27654f, 0.77396f,  1.21684f,  -1.75909f, -0.51272f,
-  -1.25923f, 0.35005f,  -0.04257f, -0.23389f, -0.41841f, -0.08229f, 0.09503f,
-  2.73144f,  -0.16875f, -0.23482f, 0.02194f,  -0.26427f, 0.28049f,  0.21260f,
-  1.35792f,  0.27733f,  0.88660f,  -0.68304f,
-};
-
-static const float av1_tx_type_nn_bias_4x4_hor_layer0[8] = {
-  1.38742f, 0.59540f,  -1.37622f, 1.92114f,
-  0.00000f, -0.38998f, -0.32726f, -0.15650f,
-};
-
-static const float av1_tx_type_nn_weights_4x4_hor_layer1[32] = {
-  1.65254f,  1.00915f,  -0.89318f, -2.05142f, -0.23235f, 0.96781f,  -0.37145f,
-  -0.21056f, 1.13891f,  0.38675f,  0.87739f,  -1.42697f, 0.48015f,  0.61883f,
-  -0.03979f, 0.11487f,  0.48042f,  0.45200f,  -0.23242f, 0.75166f,  0.55458f,
-  0.39452f,  -0.35285f, 1.59120f,  -1.49221f, -0.48349f, -0.64692f, 1.49297f,
-  -0.26782f, -0.65416f, -0.10648f, 0.05568f,
-};
-
-static const float av1_tx_type_nn_bias_4x4_hor_layer1[4] = {
-  4.07177f,
-  3.26961f,
-  0.58083f,
-  1.21199f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x4_hor = {
-  4,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_4x4_hor_layer0,
-    av1_tx_type_nn_weights_4x4_hor_layer1 },
-  { av1_tx_type_nn_bias_4x4_hor_layer0, av1_tx_type_nn_bias_4x4_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_4x4_ver_layer0[32] = {
-  -0.02032f, 2.61610f,  0.02098f,  -0.30217f, 0.12637f,  0.11017f,  -3.01996f,
-  0.35144f,  1.93776f,  -0.20463f, 1.64102f,  -1.41986f, -3.66717f, -0.51655f,
-  0.43910f,  0.37778f,  -1.02634f, 0.85337f,  -0.69753f, 1.00206f,  2.11784f,
-  1.89427f,  1.92919f,  0.43201f,  -1.67358f, -1.67035f, -1.54623f, 0.16714f,
-  -0.06589f, -0.28142f, -0.33118f, 1.72227f,
-};
-
-static const float av1_tx_type_nn_bias_4x4_ver_layer0[8] = {
-  -0.33685f, 0.22025f,  0.28140f, 0.56138f,
-  0.93489f,  -1.77048f, 1.34989f, -0.93747f,
-};
-
-static const float av1_tx_type_nn_weights_4x4_ver_layer1[32] = {
-  -1.39506f, -1.06271f, -1.10886f, -1.69719f, 0.19699f,  -2.39850f, -1.26457f,
-  0.75328f,  -1.26005f, -0.82738f, -0.12015f, -1.02702f, 1.40828f,  -2.37739f,
-  -0.65639f, -0.71992f, -0.90453f, -1.12510f, -2.41362f, -1.16061f, -1.85577f,
-  -0.99165f, -1.91366f, 0.16785f,  0.34776f,  0.58154f,  -0.18217f, -0.29257f,
-  -0.86315f, -0.53336f, 0.30320f,  -1.32331f,
-};
-
-static const float av1_tx_type_nn_bias_4x4_ver_layer1[4] = {
-  -1.31519f,
-  -3.26321f,
-  1.71794f,
-  -1.90778f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x4_ver = {
-  4,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_4x4_ver_layer0,
-    av1_tx_type_nn_weights_4x4_ver_layer1 },
-  { av1_tx_type_nn_bias_4x4_ver_layer0, av1_tx_type_nn_bias_4x4_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 4x8 block.
-static const float av1_tx_type_nn_weights_4x8_hor_layer0[32] = {
-  0.00218f,  -0.41880f, -0.61215f, -0.92588f, 0.54291f,  -0.10898f, 0.70691f,
-  0.46819f,  -1.61598f, -0.08834f, -0.96839f, 1.18489f,  -0.45171f, -0.65445f,
-  -0.32179f, -0.10399f, 1.04379f,  0.91895f,  0.85589f,  0.08267f,  1.35388f,
-  -2.03096f, 0.08168f,  -0.06372f, -0.26732f, -0.48262f, -0.08682f, 2.44071f,
-  -1.35896f, -1.17121f, 1.68866f,  0.10357f,
-};
-
-static const float av1_tx_type_nn_bias_4x8_hor_layer0[8] = {
-  2.93391f,  0.66831f, -0.21419f, 0.00000f,
-  -0.72878f, 0.15127f, -1.46755f, 0.16658f,
-};
-
-static const float av1_tx_type_nn_weights_4x8_hor_layer1[32] = {
-  -1.52077f, -1.06243f, 0.35319f,  -0.49207f, 0.54524f,  0.44271f, 1.37117f,
-  -0.38957f, -1.28889f, -0.57133f, 0.04658f,  0.62278f,  0.37984f, 0.33247f,
-  1.65547f,  -0.56806f, -1.38645f, -0.76258f, 0.67926f,  0.08783f, -0.01443f,
-  0.34950f,  1.45812f,  -0.51332f, -1.41331f, -0.16453f, 0.05755f, 0.31405f,
-  -0.50191f, 0.18219f,  1.83664f,  -0.75276f,
-};
-
-static const float av1_tx_type_nn_bias_4x8_hor_layer1[4] = {
-  -1.17455f,
-  -2.26089f,
-  -1.79863f,
-  -2.26333f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x8_hor = {
-  4,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_4x8_hor_layer0,
-    av1_tx_type_nn_weights_4x8_hor_layer1 },
-  { av1_tx_type_nn_bias_4x8_hor_layer0, av1_tx_type_nn_bias_4x8_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_4x8_ver_layer0[128] = {
-  -0.00952f, -0.98858f, -0.93181f, 1.39594f,  0.96559f,  0.18162f,  -0.76064f,
-  -0.06066f, 0.07907f,  -0.09365f, -0.21313f, -0.02187f, -2.61707f, -2.68702f,
-  -0.10982f, 0.18559f,  1.17049f,  1.11387f,  1.12697f,  1.05804f,  1.12764f,
-  1.06318f,  1.12052f,  0.17406f,  1.83157f,  0.19362f,  0.46910f,  0.39608f,
-  0.33342f,  0.40083f,  0.27645f,  1.06864f,  -4.06645f, -0.38775f, -0.11070f,
-  0.03781f,  -0.09141f, 0.06185f,  -0.04852f, 0.20163f,  0.16784f,  0.16641f,
-  -0.50941f, -0.61087f, 2.07008f,  -0.82381f, -0.85558f, 0.05528f,  -0.10535f,
-  -2.81150f, 0.67038f,  0.43643f,  0.49062f,  -0.04465f, 0.90438f,  0.00977f,
-  0.46272f,  1.59751f,  0.95234f,  0.35086f,  0.85624f,  0.73149f,  1.67779f,
-  -2.21511f, -1.24746f, -1.09014f, -0.92441f, -1.22591f, -1.06961f, -0.95897f,
-  -1.24956f, 0.73797f,  1.23275f,  -0.60064f, -0.07851f, 0.14397f,  0.22110f,
-  -0.04422f, 0.14350f,  0.75926f,  0.35032f,  0.48104f,  2.81408f,  0.34662f,
-  0.42090f,  0.35521f,  -1.36804f, -0.14974f, -0.47696f, -0.07892f, 0.36910f,
-  0.32299f,  0.23916f,  0.06032f,  -0.17844f, -0.17558f, -1.42746f, -0.55828f,
-  -1.00418f, -0.64823f, -0.73654f, -0.85197f, -1.50989f, 1.69385f,  -0.04973f,
-  -0.09273f, 1.04249f,  0.79235f,  1.13229f,  0.99617f,  0.03851f,  0.56334f,
-  0.90795f,  1.08296f,  0.58519f,  1.74765f,  0.63971f,  1.35951f,  0.07803f,
-  -0.05127f, 0.26514f,  -0.84629f, -0.66343f, -2.10630f, 0.11017f,  2.18528f,
-  -0.21958f, 0.05970f,
-};
-
-static const float av1_tx_type_nn_bias_4x8_ver_layer0[16] = {
-  0.04205f, 0.22260f, -1.03870f, -1.19568f, 0.44283f,  0.01143f,
-  0.00235f, 4.26772f, 0.44364f,  -0.33199f, -0.39076f, -0.35129f,
-  0.08288f, 0.18195f, -0.79890f, 0.10047f,
-};
-
-static const float av1_tx_type_nn_weights_4x8_ver_layer1[64] = {
-  -0.38193f, -0.12095f, 1.57802f,  0.34932f,  -0.47333f, -0.12304f, -0.01736f,
-  -2.52445f, 0.18983f,  -0.64707f, -0.60889f, -0.53750f, 0.91666f,  -0.62823f,
-  -0.13377f, -0.43594f, -0.38618f, -0.01328f, 0.97457f,  1.48589f,  -1.03238f,
-  -0.33459f, -0.35108f, -2.42417f, 0.60229f,  0.06824f,  -0.75495f, 0.26902f,
-  0.65311f,  -0.23887f, -0.44604f, -0.55800f, -0.33842f, 0.04259f,  -0.59589f,
-  0.49738f,  -0.62301f, -0.30896f, -0.29602f, -2.57052f, 2.00943f,  -0.66490f,
-  -0.76312f, 0.28256f,  1.06311f,  -0.38364f, -0.63508f, -0.57609f, -0.88765f,
-  -1.04403f, -0.46531f, 0.34084f,  -1.20498f, -0.68352f, -0.72251f, -2.63242f,
-  -0.68736f, -0.37904f, -1.32371f, 0.47288f,  1.51904f,  0.78372f,  -1.01830f,
-  -1.01848f,
-};
-
-static const float av1_tx_type_nn_bias_4x8_ver_layer1[4] = {
-  -1.45955f,
-  -2.08949f,
-  -1.24813f,
-  -1.55368f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x8_ver = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_4x8_ver_layer0,
-    av1_tx_type_nn_weights_4x8_ver_layer1 },
-  { av1_tx_type_nn_bias_4x8_ver_layer0, av1_tx_type_nn_bias_4x8_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 8x4 block.
-static const float av1_tx_type_nn_weights_8x4_hor_layer0[128] = {
-  -0.22492f, 0.13341f,  -4.03243f, -0.64015f, 0.02783f,  0.60466f,  -0.13335f,
-  0.16828f,  0.12336f,  0.52904f,  1.18455f,  -0.32425f, 0.13052f,  0.93810f,
-  -3.71165f, 0.02990f,  -4.63558f, 0.05666f,  0.03524f,  -0.07449f, -0.44006f,
-  -0.33215f, -0.33713f, 0.08097f,  0.60873f,  0.29582f,  0.21696f,  -0.78729f,
-  -0.16757f, -0.26567f, -0.00720f, -1.11226f, 1.58189f,  1.58463f,  1.48536f,
-  1.54374f,  1.60069f,  1.46125f,  1.53932f,  0.05974f,  -1.82192f, 0.47043f,
-  0.38090f,  0.20833f,  -0.05637f, 0.05183f,  0.01323f,  -0.25662f, 0.78634f,
-  -0.55069f, -0.02975f, -1.29294f, -0.77192f, -2.34299f, -1.28074f, 0.77894f,
-  -1.69740f, -1.66032f, -1.44323f, -1.55063f, -1.50845f, -1.23690f, -1.80663f,
-  0.75079f,  2.32551f,  0.05878f,  0.80438f,  0.88584f,  0.69153f,  0.89060f,
-  0.73660f,  0.87259f,  -0.00745f, -1.30044f, -0.59430f, 2.07270f,  1.03307f,
-  -0.84697f, -1.19393f, 0.17549f,  -0.24978f, -3.67234f, 0.20781f,  -0.53946f,
-  -0.05068f, 0.88274f,  1.30371f,  0.10288f,  0.07585f,  0.12259f,  -0.30815f,
-  0.25437f,  -2.82096f, -2.69482f, 0.02370f,  0.12500f,  -0.21019f, -0.49220f,
-  0.03638f,  -0.29795f, 0.28645f,  -0.48432f, -0.38584f, -0.32148f, -0.47197f,
-  0.32437f,  0.32528f,  -0.19437f, 0.30383f,  -0.31879f, 0.26359f,  -0.12164f,
-  -0.43647f, -0.08288f, -0.33438f, -0.63608f, -0.46647f, -0.46574f, 0.47806f,
-  -0.49012f, -1.51234f, -1.13502f, -1.20470f, -1.02913f, -1.09182f, -0.93921f,
-  -1.85523f, 0.92532f,
-};
-
-static const float av1_tx_type_nn_bias_8x4_hor_layer0[16] = {
-  0.36631f,  0.02901f,  0.64305f,  1.53074f, -1.40229f, 0.03852f,
-  -0.05043f, 0.89632f,  -1.23312f, 0.07036f, 0.17070f,  0.56250f,
-  -0.28958f, -0.32869f, -0.01704f, 0.68171f,
-};
-
-static const float av1_tx_type_nn_weights_8x4_hor_layer1[64] = {
-  -0.49441f, -0.31960f, -0.84946f, -0.85800f, -2.37767f, 0.81373f,  -0.73172f,
-  -0.69337f, 0.88807f,  -0.49242f, -0.44717f, -0.11436f, 0.09978f,  0.15393f,
-  0.17083f,  1.44850f,  -0.20582f, -0.04906f, 0.42990f,  -0.61939f, -1.09692f,
-  -1.14885f, -1.36879f, -1.30828f, -0.59558f, -0.30903f, -0.08906f, 0.06953f,
-  0.15383f,  -0.04193f, -0.54858f, 1.82676f,  -0.22411f, 0.05264f,  -0.45848f,
-  -0.72985f, 0.87553f,  0.04116f,  -1.29774f, -2.63018f, 1.09089f,  -0.36048f,
-  -0.16725f, 0.11627f,  0.49918f,  0.07539f,  0.00763f,  0.73706f,  0.87800f,
-  0.57049f,  0.60969f,  1.02779f,  1.53339f,  -0.35915f, 0.06410f,  1.44582f,
-  0.09698f,  0.71888f,  0.60594f,  0.84103f,  -0.50440f, -0.38825f, 0.15626f,
-  -1.10654f,
-};
-
-static const float av1_tx_type_nn_bias_8x4_hor_layer1[4] = {
-  -0.92861f,
-  -1.45151f,
-  -1.33588f,
-  -4.33853f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x4_hor = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_8x4_hor_layer0,
-    av1_tx_type_nn_weights_8x4_hor_layer1 },
-  { av1_tx_type_nn_bias_8x4_hor_layer0, av1_tx_type_nn_bias_8x4_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_8x4_ver_layer0[32] = {
-  -1.10946f, 1.86574f,  -1.59343f, 0.27018f, -1.70676f, -0.73982f, -0.19021f,
-  -1.94208f, -2.29759f, -1.44402f, 0.28700f, -1.18340f, -1.50158f, -0.44175f,
-  -1.36831f, 1.00374f,  2.59312f,  0.50291f, -0.71042f, -0.12238f, -0.15901f,
-  -0.22807f, -0.67376f, -0.30215f, 0.54407f, -0.45538f, 1.18262f,  2.28687f,
-  1.66212f,  1.70826f,  1.55182f,  0.12230f,
-};
-
-static const float av1_tx_type_nn_bias_8x4_ver_layer0[8] = {
-  0.10943f,  2.09789f, 2.16578f, 0.15766f,
-  -0.42461f, 0.00000f, 1.22090f, -1.28717f,
-};
-
-static const float av1_tx_type_nn_weights_8x4_ver_layer1[32] = {
-  1.20426f,  -1.23237f, 2.41053f, -0.72488f, 1.25249f,  0.18018f,  -0.09586f,
-  2.17901f,  0.15364f,  1.21535f, -0.38263f, -0.74309f, 0.50551f,  -0.54208f,
-  0.59139f,  1.16095f,  0.55919f, -0.60183f, 1.18949f,  1.60787f,  0.54002f,
-  -0.10712f, -0.16153f, 0.16207f, -0.32338f, 2.68712f,  -2.83483f, -0.27086f,
-  -1.15005f, -0.39311f, 1.51236f, -1.68973f,
-};
-
-static const float av1_tx_type_nn_bias_8x4_ver_layer1[4] = {
-  1.81013f,
-  1.10517f,
-  2.90059f,
-  0.95391f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x4_ver = {
-  4,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_8x4_ver_layer0,
-    av1_tx_type_nn_weights_8x4_ver_layer1 },
-  { av1_tx_type_nn_bias_8x4_ver_layer0, av1_tx_type_nn_bias_8x4_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 8x8 block.
-static const float av1_tx_type_nn_weights_8x8_hor_layer0[128] = {
-  -0.85529f, 0.37619f,  0.12754f,  0.08622f,  0.45278f,  0.54929f,  1.60651f,
-  -0.62654f, -0.54929f, -0.10131f, -0.17569f, 0.13948f,  0.31695f,  -0.05616f,
-  0.20483f,  -0.36448f, 2.27203f,  -0.33087f, 0.47679f,  0.86888f,  0.39370f,
-  0.46239f,  0.01113f,  1.50327f,  -1.48226f, -1.69621f, -1.49777f, -1.38885f,
-  -1.37753f, -1.22681f, -1.70576f, 0.51329f,  -1.65662f, 1.74197f,  -0.13579f,
-  -0.13133f, -0.58396f, -0.55510f, -1.10709f, -2.34975f, 0.22445f,  -0.56491f,
-  -0.83432f, 0.13492f,  1.32147f,  2.85285f,  0.13819f,  0.03792f,  -1.30792f,
-  0.04155f,  -0.70644f, -0.43430f, -0.16212f, -0.86945f, -1.16976f, 1.68339f,
-  0.29540f,  0.01137f,  -0.25335f, -0.16856f, 0.12028f,  0.05207f,  0.39357f,
-  -0.01545f, -0.21980f, -1.94091f, -1.01315f, -0.68270f, -0.40590f, -0.67111f,
-  2.08283f,  0.19291f,  -4.81426f, -0.65044f, -0.24598f, 0.06371f,  -0.10272f,
-  -0.14502f, -0.06821f, 0.45202f,  0.21091f,  -0.80864f, 0.39255f,  1.79189f,
-  1.80453f,  1.10484f,  1.17608f,  0.96901f,  -0.35871f, -0.94311f, 0.63147f,
-  2.95157f,  0.45917f,  -0.42849f, -0.55643f, -0.06097f, 3.49299f,  -0.50972f,
-  0.11075f,  -0.08405f, -0.09274f, -0.22694f, -0.42426f, 0.48632f,  -1.61074f,
-  1.82998f,  0.37623f,  -1.20330f, -0.01142f, -1.33307f, -0.27492f, -2.23621f,
-  1.38846f,  1.42085f,  1.42568f,  1.36152f,  1.46910f,  1.27473f,  1.34752f,
-  0.12753f,  -1.08197f, -1.08280f, -0.79489f, -1.12338f, -1.06795f, -0.87857f,
-  -0.99892f, 1.09823f,
-};
-
-static const float av1_tx_type_nn_bias_8x8_hor_layer0[16] = {
-  -0.49232f, -0.29685f, -1.44020f, 1.10940f,  1.16452f, -0.34862f,
-  -0.38761f, -0.36243f, 0.21776f,  0.28234f,  2.34269f, -0.04104f,
-  -0.26319f, 2.65579f,  -1.30137f, -0.01487f,
-};
-
-static const float av1_tx_type_nn_weights_8x8_hor_layer1[64] = {
-  -0.38058f, -0.41295f, -1.26884f, -0.75560f, -1.57450f, 0.56072f,  -1.42322f,
-  -0.29106f, 0.07228f,  0.04391f,  1.61388f,  -0.03055f, 0.81637f,  2.06045f,
-  0.27119f,  -0.48328f, -0.45528f, -0.60534f, -1.61209f, -0.78157f, -1.65034f,
-  0.60958f,  -1.30523f, 0.25143f,  0.11398f,  0.37860f,  1.54829f,  0.02309f,
-  0.67288f,  2.11447f,  0.44845f,  -0.70406f, -0.67897f, -0.38759f, -1.30383f,
-  -1.22646f, -1.54571f, 0.60552f,  -1.52565f, 0.11469f,  0.17344f,  0.08622f,
-  1.57906f,  -0.00909f, 0.81634f,  2.04909f,  1.26466f,  -1.45741f, -0.75229f,
-  0.06200f,  -1.05835f, -0.66257f, -1.73766f, 0.99923f,  -1.87082f, 0.14580f,
-  0.49525f,  0.46839f,  1.32203f,  0.33923f,  0.97001f,  2.38584f,  1.58811f,
-  0.06161f,
-};
-
-static const float av1_tx_type_nn_bias_8x8_hor_layer1[4] = {
-  1.70385f,
-  1.82373f,
-  1.78496f,
-  1.80826f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x8_hor = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_8x8_hor_layer0,
-    av1_tx_type_nn_weights_8x8_hor_layer1 },
-  { av1_tx_type_nn_bias_8x8_hor_layer0, av1_tx_type_nn_bias_8x8_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_8x8_ver_layer0[128] = {
-  -0.67016f, -1.72366f, -1.86576f, -1.50962f, -1.70419f, -1.73964f, -1.84615f,
-  2.09681f,  -0.05081f, -0.61030f, 2.02541f,  0.60222f,  0.99936f,  2.02114f,
-  -0.53893f, -0.23757f, 0.73566f,  0.25443f,  0.00132f,  -0.74036f, -0.75351f,
-  -0.76964f, -1.71007f, -0.15770f, 1.60982f,  2.17638f,  0.90681f,  0.64973f,
-  0.85914f,  0.58786f,  -1.46228f, 0.05187f,  1.18804f,  0.30850f,  0.29512f,
-  0.40526f,  0.37635f,  0.32311f,  0.37471f,  1.12346f,  3.41856f,  -0.36653f,
-  0.42537f,  -0.19240f, 0.00155f,  0.30826f,  -0.02116f, -0.53435f, -0.34829f,
-  -0.52466f, -0.11521f, -0.29163f, -2.05689f, -2.87372f, -0.62626f, 0.09585f,
-  -0.75257f, 0.10057f,  1.43474f,  0.89450f,  0.75900f,  1.11147f,  1.00558f,
-  0.25886f,  2.22095f,  -0.17926f, 0.57161f,  0.39546f,  0.47846f,  0.40452f,
-  0.54298f,  0.45814f,  -3.62788f, -3.02374f, 0.03716f,  -0.13937f, -0.09415f,
-  -0.12463f, 0.05682f,  0.03672f,  1.20746f,  1.25003f,  1.27071f,  1.31883f,
-  1.27473f,  1.34943f,  1.23158f,  0.09039f,  0.19388f,  0.63420f,  2.79612f,
-  0.93803f,  -0.11323f, -0.02027f, 0.41286f,  -0.05979f, -3.80705f, -0.52451f,
-  -0.77098f, -0.68132f, -0.65559f, -0.60975f, -1.26165f, 0.25582f,  0.05346f,
-  0.61403f,  0.32140f,  -2.39831f, -1.42355f, 1.30541f,  1.02361f,  0.12930f,
-  -1.61469f, -0.77036f, -0.59144f, 1.27769f,  1.52068f,  0.82137f,  1.83159f,
-  -0.66626f, -0.69806f, -1.00564f, -0.85995f, -0.90889f, -0.84412f, -0.85712f,
-  -1.29848f, 0.39308f,
-};
-
-static const float av1_tx_type_nn_bias_8x8_ver_layer0[16] = {
-  -0.14868f, -0.48343f, 3.94416f,  -0.78037f, -1.33789f, -0.60611f,
-  0.51793f,  0.44030f,  -0.71563f, 0.22561f,  -1.19083f, -0.46149f,
-  0.83015f,  0.06024f,  1.17180f,  0.65122f,
-};
-
-static const float av1_tx_type_nn_weights_8x8_ver_layer1[64] = {
-  -1.42711f, -0.21683f, 2.12061f,  0.20489f,  -0.50228f, -0.24770f, 0.23391f,
-  1.03470f,  -0.44847f, -0.63225f, -0.21583f, -0.06467f, -0.21892f, -0.07786f,
-  1.43322f,  0.00280f,  -1.53057f, -0.18912f, 1.95333f,  0.31151f,  -2.07601f,
-  0.06776f,  0.25529f,  0.94800f,  -1.11453f, -0.20594f, -0.13281f, 0.01485f,
-  0.17650f,  -0.07955f, 1.43734f,  -0.23193f, -2.06463f, -0.21238f, 2.13707f,
-  0.30351f,  0.27594f,  -0.36245f, 0.19539f,  0.91045f,  -0.24068f, -0.37616f,
-  0.88792f,  0.02947f,  -0.16903f, -0.04932f, 1.51293f,  -0.95967f, -1.62903f,
-  0.05326f,  2.30703f,  0.64445f,  -1.09464f, -0.16623f, 1.00240f,  0.07548f,
-  -0.50406f, 0.63854f,  1.02340f,  0.49833f,  0.13671f,  0.26722f,  2.09516f,
-  -0.41305f,
-};
-
-static const float av1_tx_type_nn_bias_8x8_ver_layer1[4] = {
-  2.14067f,
-  2.76699f,
-  2.04233f,
-  1.34803f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x8_ver = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_8x8_ver_layer0,
-    av1_tx_type_nn_weights_8x8_ver_layer1 },
-  { av1_tx_type_nn_bias_8x8_ver_layer0, av1_tx_type_nn_bias_8x8_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 8x16 block.
-static const float av1_tx_type_nn_weights_8x16_hor_layer0[128] = {
-  -1.61872f, -1.58520f, -1.41236f, -1.53255f, -1.59794f, -1.25769f, -1.90043f,
-  0.73431f,  1.10135f,  0.47054f,  0.43230f,  -0.43009f, -0.09135f, -0.07289f,
-  -0.38785f, 1.23775f,  -0.35312f, 0.73789f,  0.88864f,  0.75957f,  0.62579f,
-  0.46974f,  0.21851f,  1.63821f,  -2.27289f, -0.68522f, -0.69814f, -0.84368f,
-  -0.91320f, -0.63055f, -1.03296f, 0.55778f,  -0.00071f, 1.27539f,  1.60068f,
-  1.40975f,  0.97372f,  0.92843f,  1.90853f,  0.12626f,  1.71953f,  1.41978f,
-  -0.12234f, -1.27058f, 0.76207f,  0.02495f,  -0.67038f, -0.05255f, 1.72923f,
-  1.47630f,  1.47058f,  1.47614f,  1.49354f,  1.66131f,  1.50801f,  0.17145f,
-  -2.30947f, -2.10850f, -1.25636f, -0.24900f, 0.72602f,  1.26572f,  0.97865f,
-  -0.65466f, 1.31129f,  0.26916f,  0.12139f,  -0.12761f, -0.39143f, -0.28134f,
-  0.06584f,  2.24418f,  0.22516f,  0.05011f,  -0.01671f, -0.29476f, -0.40326f,
-  0.21138f,  -0.11573f, -0.31154f, -0.36828f, 0.03694f,  -0.07172f, -0.63419f,
-  -3.14351f, -1.23125f, 0.65311f,  -0.11406f, 1.97287f,  -0.10422f, 0.83896f,
-  0.85033f,  0.49724f,  0.80482f,  0.51454f,  1.06447f,  0.76693f,  0.72599f,
-  -0.78573f, -0.53950f, 0.40894f,  0.00086f,  0.10784f,  -0.70498f, 1.16395f,
-  1.14597f,  1.13496f,  1.12177f,  1.02100f,  -1.37574f, -2.97144f, 0.33899f,
-  0.42013f,  0.86327f,  2.31983f,  2.04008f,  0.95503f,  0.15081f,  0.11530f,
-  -0.02574f, -4.77119f, 0.13257f,  -0.01704f, -0.23087f, -0.00825f, 0.07029f,
-  -0.28136f, 0.42556f,
-};
-
-static const float av1_tx_type_nn_bias_8x16_hor_layer0[16] = {
-  0.93617f,  -0.24000f, -1.26821f, 0.78780f,  0.13690f, -0.21948f,
-  -1.45162f, 0.44584f,  -1.92582f, -0.23169f, 0.56004f, -1.19937f,
-  1.81560f,  -1.02643f, -0.81690f, 0.08302f,
-};
-
-static const float av1_tx_type_nn_weights_8x16_hor_layer1[64] = {
-  0.06696f,  -0.11538f, -1.42029f, 0.32965f,  0.81046f,  0.01146f,  1.20945f,
-  -0.16899f, 0.53224f,  -0.40232f, 0.01786f,  -0.73242f, 1.29750f,  1.95185f,
-  0.70143f,  1.43287f,  0.76220f,  0.79937f,  -1.79011f, -1.15178f, 0.42526f,
-  -0.67519f, 0.77267f,  -0.30697f, 2.46004f,  -0.49828f, 0.02875f,  1.09972f,
-  1.47662f,  0.61719f,  0.61417f,  -0.12363f, 2.53048f,  0.00418f,  -1.38964f,
-  0.88117f,  0.39239f,  -0.19347f, -2.58600f, -0.33715f, 1.09323f,  -0.32127f,
-  0.02456f,  -0.19125f, 1.12728f,  0.66502f,  0.34296f,  1.14897f,  0.29967f,
-  1.19209f,  0.22108f,  -0.11975f, 1.49776f,  -1.34624f, -2.58478f, -1.34632f,
-  1.53207f,  0.45634f,  -1.48476f, 0.17489f,  0.71790f,  -2.12086f, -1.21778f,
-  -1.31243f,
-};
-
-static const float av1_tx_type_nn_bias_8x16_hor_layer1[4] = {
-  0.83359f,
-  1.06875f,
-  1.77645f,
-  1.49570f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x16_hor = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_8x16_hor_layer0,
-    av1_tx_type_nn_weights_8x16_hor_layer1 },
-  { av1_tx_type_nn_bias_8x16_hor_layer0, av1_tx_type_nn_bias_8x16_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_8x16_ver_layer0[128] = {
-  0.32858f,  -1.28887f, 0.25632f,  -0.05262f, 2.69203f,  -0.07004f, 1.37337f,
-  -0.05725f, -0.05659f, 0.05592f,  0.01039f,  -0.29343f, 1.58628f,  -0.30003f,
-  -3.43118f, 0.00272f,  1.70928f,  -0.76348f, 0.05889f,  -0.03263f, -0.07724f,
-  0.03523f,  -0.19890f, 1.18005f,  -0.03605f, -0.20530f, -4.00733f, 0.10210f,
-  -0.05368f, -0.17650f, -0.15317f, 0.06499f,  0.56705f,  1.04341f,  0.62890f,
-  0.73451f,  -0.22199f, 0.86659f,  0.78443f,  -0.61664f, -0.50606f, 0.30247f,
-  0.14455f,  0.39276f,  0.49203f,  0.65019f,  0.12269f,  1.64080f,  1.68289f,
-  1.42694f,  1.60825f,  1.58501f,  1.47252f,  1.62589f,  1.48218f,  0.17726f,
-  -0.04884f, 0.35376f,  -0.04796f, 0.32589f,  0.35087f,  0.35258f,  -0.46103f,
-  -0.31176f, -0.05203f, 0.07247f,  -0.26756f, 0.22019f,  0.03412f,  0.33773f,
-  0.29811f,  -0.11140f, 0.12831f,  -0.44673f, -0.09858f, 0.07889f,  0.15137f,
-  0.00347f,  -0.23394f, 0.08886f,  -0.31201f, -0.79912f, -0.51092f, 0.14123f,
-  -1.09599f, -4.26020f, -0.68675f, -0.02842f, -1.54538f, -1.28977f, -1.30558f,
-  -1.21074f, -1.37142f, -1.14743f, -1.85397f, 0.82985f,  -0.30681f, 0.04494f,
-  -0.24023f, -4.18053f, -0.16096f, -0.55492f, -0.27882f, 0.05829f,  -0.41224f,
-  -2.52088f, -0.56162f, -1.04547f, -1.70685f, -0.28842f, -1.43673f, -0.01468f,
-  -3.20585f, -0.69120f, -0.43931f, -0.46270f, -0.65885f, -0.55884f, -0.75138f,
-  0.36381f,  -5.70858f, -0.14548f, -0.15745f, -0.11812f, -0.07605f, -0.07693f,
-  -0.12236f, 0.16075f,
-};
-
-static const float av1_tx_type_nn_bias_8x16_ver_layer0[16] = {
-  -0.35385f, 0.30491f,  -0.90011f, 0.42941f,  1.20928f, -0.88331f,
-  -1.48818f, -0.34785f, -0.32668f, -0.22695f, 0.89188f, 0.65521f,
-  0.57598f,  0.99819f,  0.75175f,  0.17044f,
-};
-
-static const float av1_tx_type_nn_weights_8x16_ver_layer1[64] = {
-  -0.62913f, -0.34304f, 0.42963f,  -0.17440f, -1.44092f, 0.69142f,  -1.36067f,
-  0.52211f,  0.44658f,  -0.26501f, -0.41657f, 0.34428f,  -0.34390f, -0.58567f,
-  -0.84097f, -1.96311f, -0.37215f, -0.22250f, -1.23811f, -0.07247f, -0.81731f,
-  0.58755f,  -1.30559f, 0.39551f,  0.41743f,  -0.09940f, -0.33230f, 0.14458f,
-  -0.25139f, -0.54517f, 0.13469f,  -0.38157f, -0.39109f, -0.18205f, 0.06834f,
-  -0.08395f, -0.92187f, 0.56724f,  1.44381f,  0.53226f,  -0.22356f, 0.12285f,
-  -0.29418f, -1.86749f, -0.22372f, -0.60204f, -0.87746f, -1.16936f, 0.56884f,
-  0.62641f,  -0.11823f, 1.00395f,  1.64794f,  -0.64535f, 2.29322f,  -0.23397f,
-  0.17251f,  -0.35927f, 0.65631f,  -0.26812f, 0.80128f,  0.85748f,  0.47404f,
-  2.20547f,
-};
-
-static const float av1_tx_type_nn_bias_8x16_ver_layer1[4] = {
-  -0.44080f,
-  -1.67455f,
-  -1.46332f,
-  -6.13206f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_8x16_ver = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_8x16_ver_layer0,
-    av1_tx_type_nn_weights_8x16_ver_layer1 },
-  { av1_tx_type_nn_bias_8x16_ver_layer0, av1_tx_type_nn_bias_8x16_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 16x8 block.
-static const float av1_tx_type_nn_weights_16x8_hor_layer0[128] = {
-  0.02600f,  0.09786f,  -1.05107f, -0.35594f, -0.15658f, 2.99828f,  -0.07106f,
-  -0.10101f, -0.14412f, -0.83790f, -0.19434f, 2.28368f,  1.91727f,  -0.00956f,
-  -0.90640f, 0.09174f,  1.58895f,  1.38945f,  1.49431f,  1.51381f,  1.44803f,
-  1.53544f,  1.44694f,  0.17753f,  1.69735f,  -0.78652f, 0.31092f,  -0.23736f,
-  0.02231f,  -0.09884f, -0.00493f, 1.21189f,  -1.94382f, -0.34629f, -0.58309f,
-  0.72291f,  -0.30056f, 0.90660f,  -0.57495f, 3.07809f,  0.73644f,  1.43050f,
-  1.34356f,  -0.66554f, 0.50102f,  -0.64305f, 0.42044f,  -1.66165f, -0.05733f,
-  -2.51402f, -1.01067f, -0.33390f, -0.32986f, -0.92431f, 1.86281f,  -0.07290f,
-  -0.26290f, -0.68941f, 1.81156f,  0.66125f,  -2.09974f, 0.17032f,  -0.67461f,
-  -0.00876f, -1.50154f, 1.17153f,  1.00377f,  0.33022f,  0.74689f,  0.42878f,
-  0.61725f,  -0.83967f, 0.09467f,  -0.39892f, 0.33863f,  0.10656f,  -0.09249f,
-  -0.39757f, 0.48481f,  -0.35162f, 1.47014f,  1.67827f,  -1.84051f, 0.16291f,
-  -0.50135f, -2.29911f, -0.42217f, -0.13358f, 1.45899f,  -0.14743f, -0.02763f,
-  -0.28003f, -0.01364f, 0.21014f,  -0.29026f, -0.20198f, 1.38782f,  0.56731f,
-  0.27489f,  0.43227f,  0.41326f,  0.42721f,  0.87720f,  -1.90067f, -5.04951f,
-  -0.17638f, -0.58119f, -0.08954f, -0.13692f, -0.12325f, -0.38548f, 0.66462f,
-  -1.42377f, -1.21917f, -1.38193f, -1.36539f, -1.39378f, -1.19629f, -1.59812f,
-  0.28689f,  0.32394f,  0.52128f,  0.01013f,  -0.28948f, -0.26293f, -0.44331f,
-  -0.36570f, -0.50757f,
-};
-
-static const float av1_tx_type_nn_bias_16x8_hor_layer0[16] = {
-  -0.08696f, -0.22110f, -1.43604f, -1.00451f, -1.51029f, 0.63736f,
-  0.45260f,  0.16229f,  4.01393f,  -0.21748f, 0.36411f,  -0.08764f,
-  -0.12329f, 0.08986f,  1.08117f,  -0.00220f,
-};
-
-static const float av1_tx_type_nn_weights_16x8_hor_layer1[64] = {
-  0.55824f,  -0.14648f, 0.81947f,  -0.45867f, -1.86078f, -0.17291f, 0.34849f,
-  0.15153f,  1.75625f,  -0.25760f, 0.72015f,  -0.30059f, -0.57975f, 0.07609f,
-  -0.02036f, 0.07912f,  0.57080f,  -0.13792f, 0.74184f,  -0.87669f, -1.87572f,
-  -0.27270f, 0.39751f,  0.19652f,  2.03514f,  -0.32944f, 0.76251f,  0.04399f,
-  -0.63175f, 0.37420f,  0.08309f,  0.04466f,  0.60255f,  -0.12820f, 1.66065f,
-  -0.59496f, -1.94794f, -0.14847f, 0.39424f,  0.16273f,  1.80587f,  0.41197f,
-  0.74691f,  -0.21217f, -0.63173f, 0.09510f,  -0.35538f, -0.04407f, 0.92847f,
-  0.20141f,  1.68680f,  -0.56528f, -2.26960f, 0.12978f,  0.73748f,  0.42438f,
-  2.00673f,  -0.40189f, 0.95423f,  0.23234f,  -0.80953f, 0.65814f,  0.49444f,
-  -0.23347f,
-};
-
-static const float av1_tx_type_nn_bias_16x8_hor_layer1[4] = {
-  3.57175f,
-  2.42612f,
-  3.31259f,
-  2.08287f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x8_hor = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_16x8_hor_layer0,
-    av1_tx_type_nn_weights_16x8_hor_layer1 },
-  { av1_tx_type_nn_bias_16x8_hor_layer0, av1_tx_type_nn_bias_16x8_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_16x8_ver_layer0[128] = {
-  0.46633f,  1.55328f,  -0.11230f, -0.29571f, 0.18814f,  -1.52430f, -2.34660f,
-  0.08644f,  -1.97718f, -1.29140f, -1.12262f, -1.12985f, -1.25911f, -0.96506f,
-  -1.57129f, 0.96021f,  1.34192f,  1.28623f,  1.21655f,  1.28758f,  1.25482f,
-  1.30195f,  1.19190f,  0.09310f,  0.52072f,  0.91487f,  1.24100f,  1.61236f,
-  1.72166f,  2.20750f,  1.62379f,  -1.43936f, 0.50665f,  0.40213f,  0.66502f,
-  -1.66699f, -3.07618f, 0.05877f,  0.60987f,  -0.09995f, -0.10916f, 0.48049f,
-  0.23812f,  0.39847f,  -0.21682f, -0.63455f, 0.33453f,  -0.67939f, -4.14355f,
-  -0.62756f, -0.22502f, -0.17215f, 0.01062f,  0.27049f,  -0.10748f, 0.30945f,
-  2.72445f,  -0.89181f, -0.06800f, 0.20595f,  -0.73385f, 0.04071f,  -1.30294f,
-  1.83507f,  0.92570f,  0.69609f,  0.76285f,  0.69892f,  0.76409f,  0.63104f,
-  0.73397f,  1.09575f,  -0.20129f, -0.24022f, -0.24599f, -0.59107f, -0.88755f,
-  -0.68987f, -0.75495f, -1.31002f, -1.30237f, -0.94093f, -2.15678f, -1.49303f,
-  -1.17498f, -1.39952f, -0.91270f, -0.05587f, 1.02381f,  -0.75580f, -0.65263f,
-  -0.78996f, -0.71075f, -0.71018f, -0.70350f, -1.26196f, 2.34208f,  -0.53611f,
-  0.19752f,  -0.16842f, -0.24828f, 0.21857f,  0.08222f,  -2.55894f, -1.75702f,
-  0.11394f,  1.03083f,  0.79972f,  -1.54112f, -1.82341f, -0.57597f, -0.02077f,
-  -0.39616f, -0.00995f, -0.12809f, 0.01188f,  -0.25117f, 0.09202f,  0.09336f,
-  -0.05614f, -0.30039f, 0.25834f,  1.19944f,  1.22533f,  0.92330f,  0.75967f,
-  -0.81945f, -0.41647f,
-};
-
-static const float av1_tx_type_nn_bias_16x8_ver_layer0[16] = {
-  0.17841f,  0.67315f,  -1.24450f, 3.13859f,  0.16203f, -0.14992f,
-  0.29553f,  -1.15567f, -0.71421f, 1.15977f,  1.14585f, 3.02460f,
-  -0.04510f, 0.48000f,  -0.09354f, -0.42422f,
-};
-
-static const float av1_tx_type_nn_weights_16x8_ver_layer1[64] = {
-  0.29912f,  -0.10009f, -1.11478f, 1.76812f,  -0.27719f, 0.52148f,  0.17622f,
-  -1.17116f, 0.73397f,  -0.69279f, -0.11080f, 1.53751f,  -1.42003f, 0.14731f,
-  0.13592f,  -0.04883f, 0.39186f,  -0.13655f, -0.43994f, 1.82759f,  -0.25601f,
-  -0.15018f, 0.51920f,  -1.56070f, 0.31683f,  -0.79367f, -0.02904f, 1.28637f,
-  -1.15203f, 0.26627f,  0.42828f,  -0.24258f, 0.38647f,  -0.83352f, 0.32553f,
-  2.09522f,  -0.26822f, -0.42191f, 0.32825f,  -1.30748f, 1.50551f,  -0.52669f,
-  0.20045f,  1.69318f,  -1.47839f, 0.30802f,  -0.07290f, -0.28106f, 0.68192f,
-  -0.15522f, 1.12579f,  2.21921f,  0.09720f,  -0.50265f, 0.83165f,  -1.31721f,
-  0.72422f,  -1.24952f, 0.61653f,  2.04117f,  -1.42406f, 0.52568f,  -0.46180f,
-  -0.00873f,
-};
-
-static const float av1_tx_type_nn_bias_16x8_ver_layer1[4] = {
-  3.34981f,
-  3.74710f,
-  1.38339f,
-  0.45176f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x8_ver = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_16x8_ver_layer0,
-    av1_tx_type_nn_weights_16x8_ver_layer1 },
-  { av1_tx_type_nn_bias_16x8_ver_layer0, av1_tx_type_nn_bias_16x8_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 16x16 block.
-static const float av1_tx_type_nn_weights_16x16_layer0[128] = {
-  1.26592f,  1.36313f,  1.30956f,  1.29926f,  1.48816f,  1.68851f,  1.32000f,
-  0.13321f,  -0.22477f, -0.88906f, -0.19622f, 1.69605f,  1.22180f,  -1.57771f,
-  -1.15765f, 0.05710f,  -1.13355f, -0.85486f, -0.99971f, -0.91571f, -1.06031f,
-  -0.77952f, -1.15723f, 1.17809f,  1.35602f,  -0.05243f, -0.37596f, 0.26108f,
-  0.17611f,  -0.10323f, 0.77279f,  -0.48911f, -0.79308f, 0.55112f,  0.43918f,
-  0.27872f,  0.28714f,  0.45830f,  1.05689f,  0.03705f,  -2.49975f, -0.01940f,
-  0.05709f,  0.07942f,  -0.13290f, -0.10359f, 0.00143f,  0.37303f,  0.96470f,
-  0.53293f,  1.14459f,  0.89185f,  0.43378f,  0.47764f,  0.90924f,  0.15279f,
-  -0.15361f, 0.02949f,  0.42240f,  0.68143f,  0.89588f,  0.73754f,  0.10974f,
-  1.57755f,  -0.39870f, -0.32914f, 0.35638f,  0.34991f,  -0.00003f, -0.23373f,
-  0.29630f,  -0.76699f, -0.01356f, 0.04234f,  0.84253f,  1.92078f,  0.93160f,
-  0.71993f,  0.71604f,  0.76455f,  -1.59782f, 0.32332f,  1.11628f,  0.33062f,
-  -0.03728f, -0.05710f, 0.80447f,  -0.14719f, 1.34658f,  -0.05718f, 0.64015f,
-  0.21926f,  0.41653f,  0.12720f,  0.54092f,  1.39411f,  1.81819f,  -0.24513f,
-  0.00955f,  0.38011f,  -0.57787f, -0.41759f, 0.68834f,  -0.31783f, -0.40607f,
-  -0.10107f, -0.79374f, 0.75599f,  -0.16282f, -0.14490f, -0.20783f, -0.55019f,
-  -0.13793f, -0.22293f, 0.18305f,  0.12445f,  0.56830f,  0.24567f,  0.09278f,
-  0.70803f,  0.35803f,  -1.52676f, -0.89624f, 0.77665f,  0.19877f,  0.77175f,
-  0.50355f,  0.08592f,
-};
-
-static const float av1_tx_type_nn_bias_16x16_layer0[16] = {
-  -1.31834f, 0.14346f,  -0.10062f, 0.84489f,  0.95617f,  -0.06720f,
-  -0.68502f, -0.91442f, -0.31932f, 0.25276f,  -0.15138f, -1.57661f,
-  -0.14062f, -0.42120f, 0.94573f,  -0.09287f,
-};
-
-static const float av1_tx_type_nn_weights_16x16_layer1[64] = {
-  -1.80333f, -1.06353f, 0.55139f,  0.74644f,  0.13747f, -0.93018f, -0.10286f,
-  0.67133f,  0.24460f,  1.44583f,  0.02173f,  0.26037f, -0.73687f, 0.19566f,
-  0.61846f,  -0.58601f, -1.03196f, -0.74415f, 0.30041f, -0.41967f, 1.08740f,
-  0.96224f,  -0.59139f, 0.03813f,  0.05403f,  1.33427f, -0.54375f, -1.92181f,
-  0.54704f,  0.13608f,  0.22151f,  -0.38076f, 1.18390f, -0.77508f, -1.84283f,
-  1.00894f,  0.62318f,  -0.15296f, 1.27600f,  0.22822f, 0.12751f,  0.93910f,
-  -0.28502f, 0.53912f,  -0.96889f, 0.10182f,  0.81508f, -0.43028f, 2.67386f,
-  0.52204f,  0.49820f,  -0.41711f, 1.05038f,  1.12192f, 0.74349f,  -0.75417f,
-  -0.03718f, -0.35769f, 0.89651f,  0.63236f,  0.54215f, -0.07894f, 0.48274f,
-  1.08829f,
-};
-
-static const float av1_tx_type_nn_bias_16x16_layer1[4] = {
-  0.81986f,
-  1.26865f,
-  0.11118f,
-  2.48404f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x16 = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  {
-      av1_tx_type_nn_weights_16x16_layer0,
-      av1_tx_type_nn_weights_16x16_layer1,
-  },
-  {
-      av1_tx_type_nn_bias_16x16_layer0,
-      av1_tx_type_nn_bias_16x16_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx type model for 4x16 block.
-static const float av1_tx_type_nn_weights_4x16_hor_layer0[32] = {
-  0.36539f,  0.25667f,  0.01491f,  -0.21959f, 2.55105f,  0.17615f, 1.79884f,
-  1.65936f,  -0.44363f, 0.00706f,  -0.68004f, -0.64360f, 1.75760f, 1.91906f,
-  1.47682f,  0.09650f,  -3.59244f, -0.35004f, 0.93295f,  0.25806f, -0.08154f,
-  0.79332f,  0.79535f,  1.09467f,  1.57855f,  -0.51359f, 0.90553f, -1.67744f,
-  -1.74563f, -0.88830f, -1.77603f, 2.15935f,
-};
-
-static const float av1_tx_type_nn_bias_4x16_hor_layer0[8] = {
-  -0.36435f, -2.22731f, -0.00837f, -1.34546f,
-  0.62806f,  -0.20675f, 4.91940f,  -0.56079f,
-};
-
-static const float av1_tx_type_nn_weights_4x16_hor_layer1[32] = {
-  -0.57191f, -1.46418f, 0.67331f,  -1.15027f, 0.46288f,  0.81251f,  2.51768f,
-  -0.27147f, 0.00761f,  -2.15214f, -0.69650f, -0.50808f, 0.92832f,  0.45668f,
-  2.34201f,  -0.52941f, 0.51008f,  -1.55496f, -0.01371f, -0.12356f, 0.66624f,
-  0.88043f,  2.64862f,  -1.28024f, -0.17578f, -1.80034f, -0.32217f, 0.89519f,
-  1.28413f,  -0.30326f, 2.45329f,  -0.83335f,
-};
-
-static const float av1_tx_type_nn_bias_4x16_hor_layer1[4] = {
-  2.33198f,
-  3.36245f,
-  1.62603f,
-  2.91056f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x16_hor = {
-  4,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_4x16_hor_layer0,
-    av1_tx_type_nn_weights_4x16_hor_layer1 },
-  { av1_tx_type_nn_bias_4x16_hor_layer0, av1_tx_type_nn_bias_4x16_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_4x16_ver_layer0[128] = {
-  1.61392f,  1.41239f,  1.47646f,  1.47325f,  1.46110f,  1.49208f,  1.49414f,
-  0.12835f,  -0.76986f, 0.07087f,  -0.24572f, -0.93168f, 3.07935f,  -0.18183f,
-  -0.09831f, -0.07703f, -0.03222f, -0.25473f, -0.06090f, 2.93713f,  -0.38711f,
-  -0.12884f, -0.18329f, -0.06262f, -0.00327f, -0.02930f, -0.01641f, -0.00622f,
-  -0.03305f, -4.07069f, -2.76643f, 0.04413f,  -1.03176f, -0.19217f, -0.44980f,
-  -2.48615f, -2.58112f, -0.87695f, 0.16187f,  -0.04891f, -0.06854f, 1.08104f,
-  0.75245f,  1.49302f,  0.63363f,  1.45715f,  0.92574f,  1.72029f,  0.33326f,
-  3.86646f,  0.04422f,  0.41019f,  0.36212f,  0.56600f,  -1.01552f, 0.05128f,
-  0.40454f,  -1.05100f, -0.47461f, -1.33168f, -0.46145f, -1.36870f, -0.88838f,
-  -1.05358f, -0.18537f, -0.34357f, -0.03698f, 0.68905f,  0.41010f,  0.31223f,
-  -0.43382f, -0.74715f, 2.03366f,  -0.30419f, 0.45747f,  0.09526f,  0.31678f,
-  0.22915f,  0.21832f,  1.26385f,  -0.06814f, -0.71417f, -1.18947f, 0.03762f,
-  0.10936f,  2.97396f,  -0.42638f, -0.03123f, -5.49756f, -0.17029f, -0.11323f,
-  0.05173f,  -0.44274f, -0.15738f, 0.11311f,  0.43872f,  0.16837f,  -0.52849f,
-  2.90050f,  -0.54735f, -0.29591f, 1.24030f,  0.21696f,  -0.04443f, -1.60877f,
-  -1.36365f, -1.27432f, -1.52060f, -1.34397f, -1.13371f, -1.87554f, 0.80123f,
-  0.42820f,  -0.14157f, -2.73963f, -0.68040f, -0.35236f, 0.14490f,  2.23477f,
-  0.01370f,  -0.20426f, -1.51411f, -0.72293f, 0.64516f,  0.97638f,  0.32616f,
-  -0.27975f, -0.01149f,
-};
-
-static const float av1_tx_type_nn_bias_4x16_ver_layer0[16] = {
-  -1.37863f, -0.05763f, -0.07041f, 0.15306f,  0.96026f,  -1.42105f,
-  -0.55822f, 1.04845f,  -0.17662f, -1.25345f, -0.11927f, 0.49845f,
-  -0.32530f, 0.73483f,  0.08322f,  -0.23890f,
-};
-
-static const float av1_tx_type_nn_weights_4x16_ver_layer1[64] = {
-  0.27194f,  0.50607f,  0.49229f,  -0.48192f, 0.15667f,  -1.38891f, 0.38102f,
-  -0.58825f, -0.07337f, -0.52909f, 0.36975f,  0.28710f,  0.34992f,  -0.73630f,
-  0.30386f,  -0.58822f, 0.36127f,  0.57950f,  0.55878f,  -0.42796f, 0.19967f,
-  -1.45517f, 0.42529f,  -0.54630f, -0.38169f, -0.84899f, 0.41622f,  0.46935f,
-  0.39077f,  -0.75448f, 0.31698f,  -0.76187f, 0.97765f,  0.57052f,  0.55825f,
-  -0.54273f, 0.20466f,  -1.46347f, 0.41813f,  -0.55019f, -0.19948f, -0.57982f,
-  0.41206f,  0.32373f,  0.38537f,  -1.11657f, 0.32887f,  -0.76911f, 1.12259f,
-  0.72163f,  0.82603f,  0.37786f,  0.34976f,  -1.86642f, 0.59961f,  -0.16329f,
-  -0.36631f, -0.56814f, 0.60410f,  0.53158f,  0.56389f,  -0.70508f, 0.51009f,
-  -0.56513f,
-};
-
-static const float av1_tx_type_nn_bias_4x16_ver_layer1[4] = {
-  4.60896f,
-  4.53551f,
-  4.53124f,
-  4.27435f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_4x16_ver = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_4x16_ver_layer0,
-    av1_tx_type_nn_weights_4x16_ver_layer1 },
-  { av1_tx_type_nn_bias_4x16_ver_layer0, av1_tx_type_nn_bias_4x16_ver_layer1 }
-};
-/******************************************************************************/
-
-// Tx type model for 16x4 block.
-static const float av1_tx_type_nn_weights_16x4_hor_layer0[128] = {
-  1.45347f,  -0.15743f, 0.44236f,  0.25808f,  0.33944f,  0.38678f,  0.24428f,
-  1.67287f,  0.09539f,  -0.42940f, -0.31507f, -0.00154f, -2.98755f, -2.27744f,
-  -0.49183f, 0.09333f,  -0.99026f, -0.22157f, 0.53701f,  0.60447f,  0.15686f,
-  -0.04646f, 0.26341f,  2.12361f,  0.27090f,  -1.14716f, -0.64146f, -0.91604f,
-  -0.75335f, -0.60056f, -1.25084f, 1.68473f,  -3.24075f, -4.03867f, -2.07877f,
-  -0.02347f, 0.00333f,  -0.01259f, -0.00465f, 0.02526f,  0.36286f,  -0.10324f,
-  2.12780f,  -0.74584f, -1.05052f, 1.78467f,  -0.55065f, -0.03326f, 2.46781f,
-  1.18349f,  0.96015f,  1.01696f,  1.10584f,  1.07263f,  1.11531f,  -1.06413f,
-  0.32389f,  -1.87360f, -0.14435f, 1.77926f,  1.09966f,  -0.12680f, -0.61386f,
-  -0.09724f, -0.33095f, 1.12122f,  1.00791f,  1.52416f,  1.35004f,  1.32657f,
-  0.60950f,  -1.13538f, -0.38654f, 0.06473f,  2.10669f,  0.27734f,  -0.38359f,
-  -1.91455f, -1.22676f, 0.05786f,  0.97432f,  2.19967f,  0.50457f,  0.78976f,
-  0.95183f,  -0.32414f, 0.49437f,  -0.04506f, 0.18993f,  -0.07971f, 0.23889f,
-  -0.09872f, -0.66036f, 0.05377f,  2.69638f,  -0.08259f, -0.69210f, -1.08296f,
-  -1.96504f, -2.31947f, -0.80161f, -0.80456f, -1.35556f, -0.05323f, -4.42658f,
-  -0.30732f, -0.12043f, 0.11126f,  0.10771f,  -0.14956f, -0.02218f, 0.41016f,
-  1.16599f,  1.14629f,  1.12881f,  1.18676f,  1.24677f,  1.28695f,  1.11270f,
-  0.08233f,  1.75440f,  0.49228f,  -0.34858f, -0.17032f, 0.29288f,  0.47175f,
-  0.19055f,  -1.56413f,
-};
-
-static const float av1_tx_type_nn_bias_16x4_hor_layer0[16] = {
-  -1.71227f, 0.47291f, -0.97536f, -0.66216f, 0.11729f,  -0.21451f,
-  2.75281f,  0.04318f, 2.03965f,  0.14618f,  -0.70483f, -0.24517f,
-  1.14048f,  0.33308f, -1.10886f, 0.41184f,
-};
-
-static const float av1_tx_type_nn_weights_16x4_hor_layer1[64] = {
-  -1.17079f, 0.19096f,  -1.05753f, -0.30803f, -1.21680f, -0.67255f, 1.60115f,
-  0.05972f,  1.44759f,  -0.04068f, -0.26331f, 0.31400f,  0.96923f,  0.33443f,
-  -0.77215f, -0.91316f, -1.78928f, 0.21483f,  -1.24008f, -0.46190f, -0.12127f,
-  -0.62144f, 1.37593f,  0.08373f,  1.56215f,  0.00279f,  -0.14556f, 0.38710f,
-  0.96228f,  0.66433f,  -0.51798f, -0.80738f, -0.18539f, 0.19377f,  -1.03090f,
-  -1.51044f, -0.59485f, -0.62589f, 1.90742f,  0.09078f,  1.49113f,  0.00205f,
-  -0.15918f, 0.40827f,  1.08553f,  0.43431f,  0.33519f,  -1.12669f, -1.10274f,
-  0.80004f,  -1.83599f, -0.53134f, 2.00515f,  -0.32670f, 1.37124f,  0.51136f,
-  1.62563f,  0.24787f,  0.31757f,  0.81751f,  1.57262f,  0.83214f,  1.04661f,
-  -0.43819f,
-};
-
-static const float av1_tx_type_nn_bias_16x4_hor_layer1[4] = {
-  2.32575f,
-  2.75703f,
-  1.12304f,
-  2.15567f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x4_hor = {
-  8,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_16x4_hor_layer0,
-    av1_tx_type_nn_weights_16x4_hor_layer1 },
-  { av1_tx_type_nn_bias_16x4_hor_layer0, av1_tx_type_nn_bias_16x4_hor_layer1 }
-};
-
-static const float av1_tx_type_nn_weights_16x4_ver_layer0[32] = {
-  0.26047f,  0.99930f,  1.16484f,  -0.28196f, -2.67483f, -0.21456f, -0.16854f,
-  0.46375f,  1.47951f,  1.13735f,  1.12356f,  0.27385f,  0.50978f,  2.09967f,
-  -1.47386f, 0.01950f,  -0.06362f, 0.26014f,  1.04544f,  -0.03099f, 0.07478f,
-  -0.39701f, 0.05545f,  2.73633f,  -0.56305f, -0.02208f, -0.44517f, -0.00897f,
-  -0.17967f, -0.96622f, 0.42635f,  -1.04784f,
-};
-
-static const float av1_tx_type_nn_bias_16x4_ver_layer0[8] = {
-  -0.52088f, 0.52844f,  -1.03655f, -0.30974f,
-  2.59952f,  -1.93604f, 0.00000f,  2.51787f,
-};
-
-static const float av1_tx_type_nn_weights_16x4_ver_layer1[32] = {
-  0.10916f,  -0.21219f, -0.51340f, 0.69161f,  1.45988f,  -1.36942f, -0.40899f,
-  1.05136f,  -0.08486f, 0.10008f,  -0.55304f, 0.88012f,  1.61177f,  -1.64507f,
-  0.63428f,  1.15130f,  -0.17287f, -0.18592f, -0.01143f, 0.88293f,  1.73326f,
-  -1.63624f, 0.09359f,  1.18393f,  0.26531f,  0.22378f,  0.15170f,  1.06965f,
-  1.26814f,  -1.93873f, -0.00768f, 1.58309f,
-};
-
-static const float av1_tx_type_nn_bias_16x4_ver_layer1[4] = {
-  2.34713f,
-  1.68667f,
-  1.25488f,
-  1.69812f,
-};
-
-static const NN_CONFIG av1_tx_type_nnconfig_16x4_ver = {
-  4,  // num_inputs
-  4,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      8,
-  },  // num_hidden_nodes
-  { av1_tx_type_nn_weights_16x4_ver_layer0,
-    av1_tx_type_nn_weights_16x4_ver_layer1 },
-  { av1_tx_type_nn_bias_16x4_ver_layer0, av1_tx_type_nn_bias_16x4_ver_layer1 }
-};
-/******************************************************************************/
-
-// Map tx_size to its corresponding neural net model for tx type prediction.
-static const NN_CONFIG *av1_tx_type_nnconfig_map_hor[] = {
-  &av1_tx_type_nnconfig_4x4_hor,   // 4x4 transform
-  &av1_tx_type_nnconfig_8x8_hor,   // 8x8 transform
-  &av1_tx_type_nnconfig_16x16,     // 16x16 transform
-  NULL,                            // 32x32 transform
-  NULL,                            // 64x64 transform
-  &av1_tx_type_nnconfig_4x8_hor,   // 4x8 transform
-  &av1_tx_type_nnconfig_8x4_hor,   // 8x4 transform
-  &av1_tx_type_nnconfig_8x16_hor,  // 8x16 transform
-  &av1_tx_type_nnconfig_16x8_hor,  // 16x8 transform
-  NULL,                            // 16x32 transform
-  NULL,                            // 32x16 transform
-  NULL,                            // 32x64 transform
-  NULL,                            // 64x32 transform
-  &av1_tx_type_nnconfig_4x16_hor,  // 4x16 transform
-  &av1_tx_type_nnconfig_16x4_hor,  // 16x4 transform
-  NULL,                            // 8x32 transform
-  NULL,                            // 32x8 transform
-  NULL,                            // 16x64 transform
-  NULL,                            // 64x16 transform
-};
-
-static const NN_CONFIG *av1_tx_type_nnconfig_map_ver[] = {
-  &av1_tx_type_nnconfig_4x4_ver,   // 4x4 transform
-  &av1_tx_type_nnconfig_8x8_ver,   // 8x8 transform
-  &av1_tx_type_nnconfig_16x16,     // 16x16 transform
-  NULL,                            // 32x32 transform
-  NULL,                            // 64x64 transform
-  &av1_tx_type_nnconfig_4x8_ver,   // 4x8 transform
-  &av1_tx_type_nnconfig_8x4_ver,   // 8x4 transform
-  &av1_tx_type_nnconfig_8x16_ver,  // 8x16 transform
-  &av1_tx_type_nnconfig_16x8_ver,  // 16x8 transform
-  NULL,                            // 16x32 transform
-  NULL,                            // 32x16 transform
-  NULL,                            // 32x64 transform
-  NULL,                            // 64x32 transform
-  &av1_tx_type_nnconfig_4x16_ver,  // 4x16 transform
-  &av1_tx_type_nnconfig_16x4_ver,  // 16x4 transform
-  NULL,                            // 8x32 transform
-  NULL,                            // 32x8 transform
-  NULL,                            // 16x64 transform
-  NULL,                            // 64x16 transform
-};
-
-// Tx split model for 4x8 block.
-static const float av1_tx_split_nn_weights_4x8_layer0[8 * 16] = {
-  0.068650f,  -0.732073f, -0.040361f, 0.322550f,  -0.021123f, 0.212518f,
-  -0.350546f, 0.435987f,  -0.111756f, -0.401568f, 0.069548f,  -0.313000f,
-  0.073918f,  -0.373805f, -0.775810f, -0.124753f, 0.181094f,  -0.602641f,
-  -0.026219f, -0.350112f, 0.020599f,  -0.311752f, -0.476482f, -0.669465f,
-  -0.310921f, 0.348869f,  -0.115984f, 0.154250f,  0.200485f,  -0.016689f,
-  0.020392f,  0.413810f,  0.634064f,  -0.627530f, 0.399178f,  -0.012284f,
-  0.472030f,  0.091087f,  -0.706100f, -0.447944f, -0.274226f, 0.445656f,
-  0.309339f,  0.505522f,  0.038496f,  -0.152809f, 0.408684f,  -0.068151f,
-  0.271612f,  0.353233f,  -0.150365f, 0.075212f,  -0.035096f, 0.346615f,
-  0.124382f,  0.477072f,  0.216288f,  0.070548f,  -0.106362f, 0.681613f,
-  -0.145502f, -0.218631f, -0.099248f, -0.001983f, -0.196819f, -0.969045f,
-  0.063009f,  -0.123053f, 0.104875f,  -0.137581f, -0.282933f, -0.003624f,
-  -0.315659f, -0.333523f, -0.503000f, -0.100063f, -0.536711f, -0.059978f,
-  -0.670248f, -0.353762f, 0.181109f,  0.289715f,  -0.071206f, 0.261141f,
-  0.052796f,  -0.114554f, -0.139214f, -0.261380f, 0.075984f,  -0.647925f,
-  -0.099528f, -0.677814f, 0.015712f,  -0.389385f, -0.095622f, -0.165117f,
-  -0.109454f, -0.175240f, -0.393914f, 0.212330f,  0.037822f,  0.248280f,
-  0.180197f,  0.110493f,  -0.525727f, -0.092329f, -0.524029f, -0.407364f,
-  -0.542373f, -0.435626f, -0.912194f, 0.062794f,  0.160433f,  0.741485f,
-  -0.103659f, -0.119327f, -0.055275f, 0.334358f,  0.014713f,  0.046327f,
-  0.831114f,  -0.576682f, 0.354369f,  -0.082088f, 0.452331f,  0.039730f,
-  -0.792429f, -0.385862f,
-};
-
-static const float av1_tx_split_nn_bias_4x8_layer0[16] = {
-  0.238621f,  2.186830f,  1.383035f,  -0.867139f, 1.257119f, -0.351571f,
-  -0.240650f, -0.971692f, 2.744843f,  1.116991f,  0.139062f, -0.165332f,
-  0.262171f,  -1.598153f, -1.427340f, -1.602306f,
-};
-
-static const float av1_tx_split_nn_weights_4x8_layer1[16] = {
-  -0.367134f, 1.373058f, -0.897039f, -0.326819f, -0.734030f, -0.290413f,
-  -0.501249f, 0.505321f, -0.537692f, -0.767893f, 0.268697f,  0.278987f,
-  0.085082f,  0.614986f, 0.847904f,  0.637578f,
-};
-
-static const float av1_tx_split_nn_bias_4x8_layer1[1] = {
-  0.20586078f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_4x8 = {
-  8,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_4x8_layer0,
-      av1_tx_split_nn_weights_4x8_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_4x8_layer0,
-      av1_tx_split_nn_bias_4x8_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 8x8 block.
-static const float av1_tx_split_nn_weights_8x8_layer0[144] = {
-  0.177983f,  -0.938386f, -0.074460f, -0.221843f, -0.073182f, -0.295155f,
-  -0.098202f, -0.279510f, 0.001054f,  -0.119319f, -1.835282f, -0.581507f,
-  -1.222222f, -1.049006f, -0.807508f, -0.454252f, -0.774879f, -0.180607f,
-  -0.886976f, -0.231971f, -0.824677f, -0.351872f, -1.323819f, 0.235378f,
-  0.015331f,  -0.341818f, 0.145549f,  -0.348362f, 0.147647f,  -0.323400f,
-  0.047558f,  -0.553025f, -0.295485f, -0.330368f, -0.530605f, -0.407516f,
-  0.447740f,  0.782381f,  -0.179164f, -0.584675f, -0.052645f, 0.038656f,
-  -0.096783f, 0.038342f,  -0.170762f, -0.405844f, -0.552665f, -0.509866f,
-  0.757204f,  -1.296465f, 0.631015f,  0.009265f,  0.646192f,  0.044523f,
-  0.653161f,  0.033820f,  0.849639f,  -0.068555f, -1.036085f, -0.511652f,
-  0.104693f,  -1.458690f, 0.286051f,  -0.089800f, 0.381564f,  -0.302640f,
-  0.304465f,  -0.268706f, 0.432603f,  -0.117914f, -2.070031f, -0.565696f,
-  -0.073027f, -1.783570f, -0.318144f, -0.320990f, -0.343966f, -0.140996f,
-  -0.322977f, -0.232147f, -0.373210f, -0.158266f, -1.922305f, -0.634373f,
-  0.101894f,  -0.221847f, 0.018412f,  -0.423887f, -0.266684f, -0.444930f,
-  -0.196237f, 0.106638f,  -0.065834f, -0.538401f, -0.280772f, -0.620348f,
-  1.089957f,  -0.799928f, 0.504112f,  -0.165763f, 0.578741f,  -0.172653f,
-  0.547316f,  -0.143484f, 0.717220f,  -0.297190f, -1.237854f, -0.074819f,
-  -0.977304f, -0.484092f, -0.646427f, -0.451443f, -0.612126f, -0.224475f,
-  -0.731608f, -0.257077f, -0.665857f, -0.346742f, -1.216372f, 0.227267f,
-  0.231249f,  -1.693073f, -0.035899f, 0.380845f,  -0.058476f, 0.409405f,
-  -0.066679f, 0.406731f,  -0.068501f, 0.396748f,  0.639462f,  0.150834f,
-  -0.418659f, -1.421931f, 0.101889f,  0.083573f,  0.129746f,  0.134460f,
-  0.081185f,  0.127420f,  0.083664f,  0.051096f,  1.361688f,  0.386093f,
-};
-
-static const float av1_tx_split_nn_bias_8x8_layer0[12] = {
-  4.280443f, 2.218902f, -0.256953f, 3.161431f,  2.082548f, 2.506052f,
-  2.563224f, 1.421976f, -1.627813f, -1.436085f, 2.297265f, 1.500469f,
-};
-
-static const float av1_tx_split_nn_weights_8x8_layer1[12] = {
-  1.178833f,  -0.428527f, -0.078737f, 0.381434f, -0.466895f, -0.901745f,
-  -0.766968f, -0.356663f, 0.450146f,  0.509370f, -0.356604f, -0.443506f,
-};
-
-static const float av1_tx_split_nn_bias_8x8_layer1[1] = {
-  -0.156294f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_8x8 = {
-  12,  // num_inputs
-  1,   // num_outputs
-  1,   // num_hidden_layers
-  {
-      12,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_8x8_layer0,
-      av1_tx_split_nn_weights_8x8_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_8x8_layer0,
-      av1_tx_split_nn_bias_8x8_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 8x16 block.
-static const float av1_tx_split_nn_weights_8x16_layer0[8 * 64] = {
-  0.374660f,  0.218905f,  -0.139779f, 0.212141f,  0.056517f,  0.051114f,
-  0.042860f,  -0.273258f, -0.340809f, 0.138983f,  -0.216996f, -0.241519f,
-  -0.123244f, 0.078577f,  -0.472273f, -0.194201f, 0.125056f,  0.239761f,
-  -0.332782f, 0.174782f,  -0.211400f, -0.129795f, 0.062195f,  0.113176f,
-  -0.008869f, 0.140764f,  0.059833f,  0.163826f,  0.359293f,  -0.109797f,
-  -0.022091f, -0.059536f, -0.188226f, 0.179709f,  0.031386f,  0.164790f,
-  0.214364f,  0.198555f,  0.152262f,  -0.242980f, 0.319367f,  -0.136902f,
-  0.046524f,  -0.043591f, 0.342178f,  -0.011757f, -0.014286f, 0.072871f,
-  -0.278314f, -0.345303f, -0.252103f, -0.107154f, -0.235101f, -0.106739f,
-  -0.120865f, -0.160042f, 0.240028f,  0.112902f,  -0.141587f, -0.703012f,
-  -0.136591f, 0.318993f,  -0.154417f, -0.054668f, 0.192870f,  0.176166f,
-  -0.029965f, 0.266942f,  -0.178384f, 0.038680f,  0.134403f,  -0.002426f,
-  0.534825f,  -0.070923f, 0.413281f,  0.418148f,  0.093729f,  0.016454f,
-  0.305358f,  -0.040512f, 0.069904f,  -0.227588f, -0.362220f, -0.031604f,
-  -0.394901f, 0.071506f,  -0.342833f, -0.142550f, -0.164005f, 0.182600f,
-  0.213062f,  0.076805f,  0.278758f,  0.125613f,  -0.035552f, 0.040971f,
-  0.182785f,  -0.227961f, -0.105413f, -0.074949f, -0.084629f, -0.254767f,
-  0.114657f,  0.047121f,  0.195902f,  0.264759f,  0.017799f,  0.210230f,
-  0.150749f,  -0.142142f, 0.182494f,  -0.142415f, -0.259782f, -0.114830f,
-  -0.198826f, 0.000061f,  -0.375668f, -0.276656f, -0.373202f, 0.210298f,
-  0.422680f,  0.066960f,  0.351106f,  -0.209034f, 0.367195f,  -0.110274f,
-  0.115573f,  -0.066642f, -0.389673f, -0.260447f, 0.056949f,  -0.180425f,
-  0.069922f,  -0.153506f, -0.097053f, -0.111757f, 0.094069f,  0.144837f,
-  -0.052984f, -0.506681f, -0.034474f, 0.279057f,  -0.105025f, 0.006656f,
-  -0.125017f, -0.114096f, 0.103153f,  -0.117402f, -0.359472f, 0.072534f,
-  0.110291f,  0.003088f,  -0.456897f, 0.038331f,  -0.322298f, 0.113942f,
-  -0.119916f, -0.194392f, 0.093167f,  0.193459f,  0.074671f,  0.033602f,
-  0.004440f,  -0.179578f, -0.036637f, -0.216172f, -0.296530f, -0.318992f,
-  0.319160f,  -0.066218f, 0.291246f,  0.181292f,  0.089914f,  0.025273f,
-  0.303128f,  0.019063f,  0.078545f,  -0.396919f, 0.014065f,  -0.122121f,
-  0.037107f,  -0.151886f, -0.299392f, -0.172207f, -0.124571f, -0.232553f,
-  0.102970f,  -0.225040f, 0.061059f,  -0.258188f, -0.469871f, -0.099607f,
-  -0.061524f, -0.213700f, 0.070237f,  -0.289134f, -0.238225f, 0.256403f,
-  -0.119344f, 0.067782f,  -0.398983f, -0.123975f, -0.200205f, -0.047038f,
-  0.026569f,  0.031037f,  0.094302f,  -0.101239f, 0.433307f,  -0.303612f,
-  0.088537f,  -0.164436f, 0.202471f,  -0.048592f, -0.251904f, 0.122577f,
-  -0.309874f, -0.263405f, -0.292503f, 0.216589f,  0.035378f,  0.136599f,
-  -0.145844f, -0.018211f, 0.174084f,  -0.449941f, -0.001428f, 0.064134f,
-  0.039652f,  0.111083f,  -0.246076f, -0.204733f, 0.056559f,  -0.000123f,
-  0.104049f,  0.138512f,  -0.128309f, 0.087855f,  0.232784f,  0.247138f,
-  0.162766f,  0.154829f,  0.313605f,  -0.164115f, -0.050844f, 0.156549f,
-  0.185279f,  -0.238962f, -0.308281f, -0.179592f, -0.193262f, 0.201670f,
-  -0.203399f, -0.096831f, -0.127867f, 0.310674f,  -0.008181f, 0.004078f,
-  -0.211038f, -0.193480f, -0.185639f, -0.150202f, -0.204858f, -0.240758f,
-  0.114268f,  -0.032535f, -0.052403f, -0.234333f, -0.064072f, -0.208444f,
-  -0.352853f, -0.224001f, -0.156330f, 0.215436f,  0.171846f,  0.291849f,
-  0.108832f,  0.046991f,  -0.127801f, 0.032485f,  0.141493f,  0.123319f,
-  -0.057250f, 0.315346f,  -0.061317f, -0.465086f, -0.130179f, -0.217841f,
-  -0.239089f, -0.073251f, -0.327718f, 0.054905f,  -0.283169f, -0.028900f,
-  0.071450f,  0.270072f,  0.248891f,  0.088052f,  0.253319f,  0.122808f,
-  0.175490f,  -0.147805f, 0.089169f,  -0.045457f, -0.330788f, 0.099791f,
-  -0.137376f, -0.195977f, -0.350942f, -0.284930f, -0.559037f, 0.030504f,
-  0.162554f,  -0.199100f, -0.050453f, -0.131320f, -0.077863f, -0.066253f,
-  -0.379723f, -0.424047f, -0.081182f, -0.252261f, -0.102815f, 0.058240f,
-  -0.182036f, 0.176772f,  -0.070823f, 0.216054f,  -0.211533f, -0.232992f,
-  0.279346f,  0.117984f,  0.236674f,  0.126625f,  -0.046220f, 0.044919f,
-  0.278492f,  0.083944f,  0.180512f,  0.217994f,  0.401170f,  -0.064417f,
-  0.011636f,  -0.139597f, -0.050020f, -0.268438f, -0.032803f, 0.024908f,
-  -0.085713f, -0.012984f, -0.055192f, -0.338657f, 0.045826f,  -0.312849f,
-  -0.023393f, -0.168800f, -0.030886f, -0.131816f, -0.253542f, -0.104812f,
-  -0.354389f, 0.169464f,  0.094151f,  -0.217122f, -0.456397f, 0.211478f,
-  0.219232f,  -0.155519f, -0.353700f, -0.264759f, -0.034709f, 0.034409f,
-  -0.148639f, -0.132850f, -0.216791f, -0.118492f, 0.173721f,  -0.144181f,
-  0.335028f,  0.176439f,  0.105980f,  0.169390f,  0.155615f,  -0.040618f,
-  -0.176029f, 0.155569f,  -0.184833f, -0.171099f, -0.178663f, -0.032051f,
-  -0.434334f, 0.092238f,  -0.263103f, 0.061804f,  -0.172957f, 0.005962f,
-  -0.100176f, 0.125898f,  0.048092f,  -0.088141f, 0.247196f,  -0.221601f,
-  -0.114474f, -0.124410f, -0.156393f, -0.181782f, -0.083562f, 0.034937f,
-  0.403401f,  -0.046200f, 0.322259f,  0.219678f,  0.109850f,  0.051837f,
-  0.196861f,  -0.019118f, 0.248818f,  -0.137567f, 0.127862f,  0.052293f,
-  0.298726f,  0.275788f,  0.015344f,  0.058714f,  0.283691f,  -0.053794f,
-  -0.123270f, -0.227761f, -0.141744f, -0.268515f, -0.007189f, -0.242117f,
-  -0.252396f, -0.069017f, 0.034803f,  -0.003388f, -0.262577f, 0.062115f,
-  -0.298393f, 0.215415f,  -0.153615f, 0.289902f,  0.085886f,  -0.504290f,
-  0.077178f,  0.150861f,  -0.228848f, -0.261020f, 0.198204f,  0.162113f,
-  0.346418f,  -0.286950f, 0.354756f,  -0.226419f, 0.024720f,  0.208037f,
-  0.107286f,  -0.110849f, 0.104415f,  -0.207725f, 0.063932f,  -0.037748f,
-  -0.167037f, -0.068282f, 0.320815f,  -0.051884f, 0.099989f,  -0.078388f,
-  0.127071f,  0.046675f,  -0.336571f, -0.273080f, 0.264694f,  -0.007352f,
-  -0.093828f, 0.094773f,  -0.144434f, 0.091795f,  -0.031615f, 0.056914f,
-  0.064673f,  -0.136669f, 0.344734f,  0.225926f,  0.283451f,  -0.068354f,
-  0.030572f,  0.180784f,  -0.378047f, -0.092962f, -0.083291f, 0.038970f,
-  0.052094f,  -0.017932f, 0.216302f,  -0.184396f, 0.079888f,  0.210406f,
-  -0.020627f, 0.244744f,  0.336972f,  -0.182914f, -0.220976f, -0.304225f,
-  -0.330974f, -0.370868f, -0.084935f, -0.136489f, -0.210082f, -0.188088f,
-  -0.408768f, 0.184693f,
-};
-
-static const float av1_tx_split_nn_bias_8x16_layer0[64] = {
-  -0.274107f, 0.445751f,  0.234359f,  0.291593f,  0.163298f,  0.183707f,
-  -0.548839f, -0.190779f, -0.163346f, -0.669028f, 0.399209f,  -0.354974f,
-  0.000000f,  -0.254630f, 0.220149f,  0.371104f,  0.789759f,  0.270300f,
-  0.195126f,  -0.206958f, 0.917708f,  -0.256232f, 1.131933f,  1.178944f,
-  0.461270f,  0.246169f,  -0.818614f, -0.111986f, 0.759355f,  0.154889f,
-  0.470299f,  -1.025250f, 0.678678f,  0.959346f,  -0.164105f, 0.544079f,
-  -0.448733f, 0.649221f,  -0.536672f, 0.962758f,  -0.256427f, 0.808664f,
-  -0.118694f, 0.684873f,  -0.015635f, -0.046469f, 0.075481f,  0.412647f,
-  0.454456f,  -0.107169f, 0.775235f,  -0.261629f, -1.194849f, 0.010093f,
-  -0.231289f, 0.658286f,  -0.769320f, 0.564545f,  0.482962f,  -0.131378f,
-  -0.255844f, -0.078400f, 0.476752f,  0.643001f,
-};
-
-static const float av1_tx_split_nn_weights_8x16_layer1[64] = {
-  -0.145065f, -0.145101f, 0.174786f,  0.196692f,  0.102025f,  -0.087735f,
-  0.386353f,  -0.660539f, -0.183940f, 0.490045f,  -0.276404f, -0.145669f,
-  0.209846f,  -0.085574f, -0.156821f, -0.377450f, -0.950010f, 0.450709f,
-  -0.108545f, -0.261181f, 1.435606f,  -0.176621f, -1.158548f, 2.035680f,
-  0.218069f,  -0.138629f, 0.305958f,  -0.277194f, -0.602468f, 0.203873f,
-  0.120720f,  0.216095f,  -0.434502f, -0.579746f, -0.239450f, 0.755529f,
-  0.545643f,  0.232091f,  0.330169f,  0.988136f,  -0.070465f, -0.345584f,
-  -0.162455f, -0.617064f, 0.123881f,  -0.201098f, 0.222756f,  0.112932f,
-  0.048647f,  -0.147890f, 0.394584f,  -0.262148f, 0.280564f,  -0.195432f,
-  -0.047515f, 1.133410f,  0.255415f,  -0.299032f, -0.397807f, -0.153246f,
-  -0.256734f, 0.177370f,  0.213522f,  -0.530158f,
-};
-
-static const float av1_tx_split_nn_bias_8x16_layer1[1] = {
-  0.14910713f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_8x16 = {
-  8,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      64,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_8x16_layer0,
-      av1_tx_split_nn_weights_8x16_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_8x16_layer0,
-      av1_tx_split_nn_bias_8x16_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 16x16 block.
-static const float av1_tx_split_nn_weights_16x16_layer0[12 * 24] = {
-  -0.177215f, -0.297166f, 0.299924f,  0.207878f,  0.216871f,  0.173264f,
-  0.295464f,  0.048395f,  0.154731f,  0.305880f,  0.056787f,  -0.166617f,
-  0.115653f,  -0.529477f, -0.073995f, -0.211746f, -0.018169f, 0.000788f,
-  -0.024940f, -0.007055f, 0.001392f,  0.021678f,  -1.594600f, -0.099593f,
-  0.332930f,  0.103574f,  0.158249f,  0.182601f,  0.332665f,  0.226207f,
-  -0.139566f, 0.185531f,  0.099074f,  -0.185654f, -0.203121f, -0.285678f,
-  -0.313453f, -0.294452f, -0.143707f, -0.031265f, -0.453030f, -0.061874f,
-  -0.066150f, -0.099058f, -0.458879f, 0.127544f,  0.338314f,  -0.161350f,
-  0.030091f,  -0.075528f, 0.004320f,  0.353690f,  -0.013480f, -0.420402f,
-  -0.004659f, -0.329401f, -0.001745f, 0.227384f,  -0.055183f, 0.121405f,
-  0.160340f,  0.143603f,  -0.221813f, 0.079107f,  -0.657639f, -0.084348f,
-  -0.303414f, 0.046774f,  -0.367679f, 0.060005f,  0.168645f,  0.084421f,
-  -0.133625f, 0.301375f,  0.079412f,  -0.419303f, 0.017235f,  0.068637f,
-  0.018384f,  -0.428325f, -0.019753f, 0.149444f,  -0.474836f, -0.287162f,
-  0.198083f,  0.028292f,  -0.299092f, -0.005849f, -0.256245f, 0.233277f,
-  -0.217561f, -0.264003f, 0.269411f,  0.207032f,  -0.339411f, -0.198431f,
-  -0.028521f, 0.158076f,  0.177116f,  0.345702f,  -0.145132f, 0.064623f,
-  -0.090867f, 0.288816f,  -0.263198f, -0.071028f, -0.044546f, 0.380017f,
-  -0.014100f, -0.271192f, -0.318559f, 0.129015f,  -0.050314f, -0.093355f,
-  -0.578498f, 0.099090f,  -0.133080f, -0.029975f, -0.059828f, -0.157765f,
-  -0.321153f, -0.343671f, -0.242959f, 0.128304f,  0.017170f,  0.072787f,
-  -0.475838f, -0.003806f, -0.068615f, 0.150556f,  -0.159903f, -0.416513f,
-  0.218794f,  -0.290456f, -0.084569f, -0.170014f, -0.044414f, -0.153069f,
-  -0.077329f, -0.089747f, -0.096526f, 0.537952f,  0.134725f,  -0.006469f,
-  -0.323335f, -0.168183f, -0.107163f, -0.139954f, 0.011286f,  -0.021712f,
-  -0.513992f, 0.259135f,  -0.319808f, 0.077811f,  0.104613f,  0.370571f,
-  0.185244f,  0.065530f,  -0.091098f, -0.573741f, 0.111934f,  0.437417f,
-  -0.123691f, 0.220641f,  -0.024783f, -0.149460f, -0.354185f, -0.134127f,
-  0.038015f,  -0.380596f, 0.250980f,  0.142208f,  0.135170f,  -0.131129f,
-  -0.357556f, -0.530945f, 0.159672f,  -0.147025f, -0.377829f, -0.504508f,
-  -0.492870f, 0.020753f,  0.142818f,  0.025172f,  0.086140f,  0.091283f,
-  0.087491f,  -0.186415f, 0.177785f,  -0.195121f, -1.191148f, -0.477102f,
-  0.023371f,  0.227004f,  -0.023502f, -0.242913f, -0.074398f, -0.153480f,
-  0.162900f,  0.415509f,  -0.162565f, -0.131709f, -0.258852f, -0.252027f,
-  -0.080845f, -0.330274f, 0.021874f,  0.232398f,  0.069277f,  0.220567f,
-  -0.024237f, -0.366771f, 0.081673f,  -0.429906f, -0.302170f, 0.061045f,
-  0.352777f,  -0.230376f, 0.408153f,  0.064758f,  0.142051f,  0.007219f,
-  0.622878f,  0.212577f,  0.036489f,  0.081150f,  -0.284767f, 0.107763f,
-  -0.529786f, -0.072190f, -0.300421f, -0.287959f, -0.568900f, 0.011547f,
-  -0.131696f, -0.356854f, -0.587962f, -0.026598f, 0.405829f,  0.057565f,
-  0.414265f,  -0.159155f, 0.221456f,  0.146314f,  0.265776f,  -0.006516f,
-  0.473978f,  -0.186431f, 0.288672f,  -0.060437f, 0.083380f,  -0.205641f,
-  0.360016f,  0.222041f,  0.420011f,  0.024579f,  0.377546f,  0.250380f,
-  -0.069900f, 0.296743f,  0.073532f,  -0.243225f, -0.374987f, -0.387288f,
-  -0.237255f, -0.287013f, 0.417831f,  -0.252988f, -0.257652f, -0.066775f,
-  -0.253926f, 0.057841f,  0.346133f,  -0.157797f, -0.406028f, -0.286893f,
-  0.274507f,  -0.452561f, 0.143381f,  -0.097755f, 0.021242f,  0.034561f,
-  0.044115f,  0.004065f,  0.066729f,  0.043558f,  0.102991f,  -0.477574f,
-};
-
-static const float av1_tx_split_nn_bias_16x16_layer0[24] = {
-  -0.479033f, 1.467402f,  -0.366291f, 0.372511f,  0.715322f,  -0.605500f,
-  0.176848f,  0.032318f,  0.237429f,  -0.046047f, 0.452082f,  0.451805f,
-  -0.822845f, 0.636762f,  -0.057350f, 1.163978f,  0.728287f,  0.603654f,
-  -0.245519f, -0.893569f, -1.428185f, 0.808870f,  -0.076159f, 1.231976f,
-};
-
-static const float av1_tx_split_nn_weights_16x16_layer1[24] = {
-  -0.176161f, 1.670188f, -0.180755f, -0.321326f, 0.249728f,  -0.170504f,
-  -0.538432f, 0.033893f, 0.149842f,  0.404140f,  -0.377812f, 0.338838f,
-  -0.176091f, 0.249844f, -0.362533f, 1.412460f,  0.196862f,  0.278194f,
-  -0.140444f, 0.297746f, 0.172533f,  0.116470f,  -0.151656f, -0.603250f,
-};
-
-static const float av1_tx_split_nn_bias_16x16_layer1[1] = {
-  0.184803f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_16x16 = {
-  12,  // num_inputs
-  1,   // num_outputs
-  1,   // num_hidden_layers
-  {
-      24,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_16x16_layer0,
-      av1_tx_split_nn_weights_16x16_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_16x16_layer0,
-      av1_tx_split_nn_bias_16x16_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 32x32 block.
-static const float av1_tx_split_nn_weights_32x32_layer0[12 * 32] = {
-  -0.439303f, 0.004813f,  -0.365052f, -0.116868f, -0.356716f, -0.196537f,
-  -0.196770f, -0.076096f, 0.357004f,  -0.044909f, -0.112910f, -0.129081f,
-  0.156725f,  -0.386346f, 0.038971f,  0.160696f,  0.204923f,  -0.384333f,
-  -0.319546f, 0.028179f,  -0.250524f, -0.289669f, -0.284138f, -0.258963f,
-  -0.180854f, -0.000807f, -0.029620f, -0.353134f, 0.212408f,  0.141414f,
-  0.303016f,  0.098066f,  0.482455f,  0.036069f,  -0.166279f, 0.210119f,
-  -0.086337f, -0.023550f, -0.250796f, -0.183945f, -0.393856f, 0.170608f,
-  -0.306403f, 0.026318f,  -0.277296f, 0.092684f,  -0.033584f, -0.018371f,
-  -0.025043f, -0.257659f, -0.139163f, -0.206949f, -0.190105f, 0.028053f,
-  0.361851f,  -0.364726f, -0.096771f, -0.184166f, -0.433228f, -0.182191f,
-  -0.097051f, 0.259172f,  0.016432f,  0.259358f,  0.145059f,  0.037196f,
-  0.091581f,  -0.219644f, 0.140384f,  -0.446837f, -0.234531f, 0.149508f,
-  -0.083429f, 0.186189f,  -0.099890f, -0.111277f, 0.495214f,  0.085053f,
-  -0.266613f, -0.051366f, 0.148593f,  0.111875f,  0.077787f,  -0.371653f,
-  -0.146157f, -0.229235f, 0.076203f,  0.488975f,  0.096771f,  -0.009483f,
-  0.192985f,  0.246273f,  -0.192671f, -0.557890f, -0.292650f, -0.088907f,
-  -0.106892f, -0.329659f, 0.012105f,  -0.359326f, 0.170723f,  -0.004357f,
-  0.171593f,  -0.478768f, -0.236016f, -0.035077f, 0.133731f,  0.137962f,
-  -0.397926f, -0.155164f, -0.276709f, -0.186602f, -0.258301f, 0.036965f,
-  -0.649359f, 0.127605f,  0.097930f,  0.182775f,  -0.313324f, 0.053349f,
-  0.204203f,  -0.222948f, -0.059008f, -0.049759f, -0.056848f, 0.087497f,
-  -0.039987f, -0.055042f, -0.041623f, -0.078424f, -0.317291f, -0.191398f,
-  0.632147f,  0.221825f,  0.268394f,  -0.096357f, 0.442545f,  -0.007117f,
-  -0.036125f, 0.000525f,  0.088092f,  -0.203653f, 0.086925f,  0.439141f,
-  0.329889f,  -0.370050f, -0.194306f, -0.207430f, 0.132779f,  -0.217614f,
-  -0.039444f, -0.053019f, -0.260725f, -0.116563f, -0.271048f, 0.283737f,
-  -0.007300f, 0.062257f,  -0.347865f, -0.296767f, -0.359123f, 0.230459f,
-  -0.189117f, -0.087622f, -0.561091f, 0.184182f,  -0.044980f, 0.012643f,
-  0.241672f,  0.050272f,  -0.204851f, -0.159285f, -0.064081f, -0.118666f,
-  -0.269471f, 0.231668f,  0.135749f,  -0.131162f, 0.062760f,  0.100949f,
-  0.074967f,  -0.056918f, 0.251707f,  0.034098f,  0.341290f,  -0.105027f,
-  0.313246f,  -0.092679f, -0.014632f, -0.390967f, 0.136881f,  -0.241554f,
-  0.097674f,  0.110832f,  -0.390245f, 0.017654f,  -0.506222f, 0.065252f,
-  0.244834f,  -0.171352f, -0.331702f, 0.111043f,  0.125217f,  -0.058116f,
-  -0.382595f, -0.052545f, 0.114261f,  -0.493617f, 0.243984f,  -0.171053f,
-  0.165009f,  -0.063020f, 0.096502f,  0.341339f,  -0.013443f, 0.056372f,
-  0.339284f,  0.398376f,  0.389409f,  0.257252f,  0.517368f,  0.078856f,
-  0.087716f,  -0.171092f, 0.227461f,  0.125307f,  -0.054423f, -0.143161f,
-  0.224041f,  -0.086477f, -0.092548f, 0.072392f,  -0.061608f, 0.258347f,
-  0.147033f,  -0.478244f, -0.204869f, 0.038552f,  -0.144563f, 0.224087f,
-  -0.296705f, 0.153889f,  -0.064624f, 0.085265f,  -0.103826f, 0.127971f,
-  0.019965f,  0.111937f,  -0.074187f, -0.029518f, -0.127305f, -0.012210f,
-  0.042714f,  0.070052f,  -0.202360f, 0.348144f,  -0.132097f, -0.209585f,
-  -0.248286f, -0.065774f, -0.089482f, -0.133226f, 0.325430f,  -0.013468f,
-  -0.406090f, -0.144936f, 0.208620f,  0.343445f,  -0.059639f, 0.114857f,
-  -0.069431f, -0.218725f, 0.190575f,  -0.368101f, 0.030030f,  0.062815f,
-  -0.239369f, -0.537852f, 0.022487f,  0.023038f,  0.190788f,  0.040123f,
-  -0.004304f, 0.060749f,  -0.108929f, 0.136796f,  -0.542875f, -0.227074f,
-  -0.182244f, 0.082559f,  0.019149f,  0.178854f,  0.120284f,  0.009070f,
-  0.068268f,  -0.544822f, 0.120536f,  0.354028f,  -0.119890f, -0.122055f,
-  -0.405335f, 0.122341f,  -0.304412f, 0.062405f,  -0.302568f, -0.276505f,
-  -0.120915f, -0.221841f, 0.282007f,  -0.253971f, 0.059517f,  -0.144976f,
-  0.149391f,  -0.047355f, -0.167742f, -0.392333f, -0.041132f, 0.342135f,
-  0.017485f,  0.021038f,  -0.023728f, -0.192181f, -0.103996f, 0.092873f,
-  -0.114365f, -0.397732f, -0.065421f, 0.053084f,  0.035201f,  0.053019f,
-  -0.105377f, -0.039500f, 0.131904f,  -0.123911f, -0.390328f, -0.125198f,
-  -0.000126f, 0.014864f,  -0.220187f, 0.084056f,  -0.492155f, -0.164979f,
-  0.133592f,  0.121519f,  -0.240813f, 0.186680f,  0.118673f,  0.235006f,
-  -0.239894f, -0.185759f, -0.336992f, 0.209620f,  -0.298845f, 0.127803f,
-  -0.083992f, 0.194340f,  -0.245378f, 0.212308f,  0.142512f,  -0.163324f,
-  0.383495f,  0.291065f,  0.286620f,  -0.239957f, 0.225127f,  -0.174424f,
-  0.297231f,  -0.045434f, 0.156444f,  -0.184273f, -0.204567f, 0.202551f,
-  0.370019f,  -0.073910f, 0.344897f,  0.063100f,  0.338547f,  -0.099145f,
-  0.391863f,  -0.214244f, -0.241734f, -0.281851f, -0.035133f, -0.153157f,
-};
-
-static const float av1_tx_split_nn_bias_32x32_layer0[32] = {
-  0.143343f,  -0.021982f, -0.314939f, 0.170867f,  -0.081248f, 0.125758f,
-  -0.355762f, 0.279798f,  1.027712f,  -0.434660f, 1.072005f,  0.668893f,
-  -0.031216f, -0.528650f, 0.328349f,  0.543645f,  -0.188810f, 0.221110f,
-  -1.638637f, 0.058045f,  -1.731105f, -0.444284f, 0.513693f,  0.890025f,
-  0.160288f,  0.393312f,  0.332856f,  -0.080767f, 0.299822f,  0.235876f,
-  0.254942f,  -0.017796f,
-};
-
-static const float av1_tx_split_nn_weights_32x32_layer1[32] = {
-  -0.090326f, -0.267553f, -0.026071f, 0.100912f,  0.279137f,  0.079064f,
-  -0.074885f, 0.053804f,  0.736810f,  -0.031693f, -0.970514f, 0.174069f,
-  0.095940f,  -0.065047f, 0.052911f,  0.176728f,  -0.058274f, 0.148364f,
-  -0.162210f, 0.093875f,  -0.367663f, 0.020876f,  0.137280f,  -1.099116f,
-  0.146854f,  0.075590f,  0.228534f,  0.141993f,  0.072143f,  0.101421f,
-  -0.068547f, -0.154148f,
-};
-
-static const float av1_tx_split_nn_bias_32x32_layer1[1] = {
-  0.316622f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_32x32 = {
-  12,  // num_inputs
-  1,   // num_outputs
-  1,   // num_hidden_layers
-  {
-      32,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_32x32_layer0,
-      av1_tx_split_nn_weights_32x32_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_32x32_layer0,
-      av1_tx_split_nn_bias_32x32_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 64x64 block.
-static const float av1_tx_split_nn_weights_64x64_layer0[12 * 32] = {
-  -0.006828f, 0.149944f,  -0.017614f, -0.044599f, -0.024517f, 0.507698f,
-  0.001039f,  0.037164f,  0.015091f,  -0.306620f, -0.162047f, -0.369440f,
-  0.396310f,  0.087121f,  0.208609f,  -0.083068f, 0.493774f,  0.217682f,
-  0.377393f,  0.172879f,  0.397422f,  0.078919f,  0.741350f,  0.064169f,
-  -0.099989f, -0.192983f, -0.278230f, -0.310048f, -0.439965f, -0.226698f,
-  -0.436596f, -0.007551f, -0.396721f, 0.153570f,  -0.190838f, -0.071869f,
-  0.048799f,  -0.301301f, -0.005015f, 0.500480f,  -0.030622f, -0.559095f,
-  -0.032634f, -0.054160f, -0.056979f, -0.456545f, 0.306536f,  -0.411323f,
-  -0.005366f, -0.069496f, 0.019990f,  0.327931f,  -0.002516f, 0.393190f,
-  0.001759f,  0.035093f,  -0.030302f, -0.528984f, 0.174781f,  0.241462f,
-  -0.415427f, -0.164502f, 0.143065f,  -0.122595f, 0.082049f,  -0.143346f,
-  0.055642f,  -0.124701f, 0.004050f,  -0.216235f, -2.681730f, 0.101658f,
-  0.381239f,  0.465936f,  0.331154f,  0.301708f,  -0.360171f, 0.054886f,
-  -0.118658f, 0.287921f,  0.277859f,  0.203784f,  0.247809f,  0.656924f,
-  -0.354628f, 0.315081f,  0.105108f,  -0.510179f, 0.059267f,  0.061386f,
-  0.076423f,  0.347119f,  0.100134f,  0.028402f,  -0.118621f, -0.238689f,
-  0.080141f,  -0.138863f, 0.009009f,  -0.100526f, -0.138875f, 0.066992f,
-  0.005949f,  0.564336f,  0.046994f,  0.004655f,  0.366047f,  0.014695f,
-  -0.146928f, -0.024665f, -0.440357f, -0.109395f, 0.527231f,  -0.020925f,
-  -0.227236f, -0.068141f, 0.282009f,  0.040192f,  -0.267100f, 0.229228f,
-  0.133861f,  0.338706f,  -0.030178f, -0.040919f, -0.026343f, -0.330338f,
-  -0.066931f, -0.110580f, -0.072056f, 0.599457f,  -0.020738f, 0.169200f,
-  0.836240f,  -0.157548f, 0.386273f,  0.002404f,  0.329410f,  -0.007020f,
-  0.351705f,  -0.041259f, 0.388861f,  0.003899f,  0.582627f,  0.023572f,
-  0.409912f,  -0.158472f, 0.536383f,  0.525093f,  0.604247f,  0.439159f,
-  0.692832f,  0.046272f,  0.590367f,  -0.082166f, 0.262357f,  0.478671f,
-  0.031935f,  0.042675f,  0.120002f,  0.398616f,  -0.078967f, 0.227986f,
-  -0.044679f, 0.151061f,  -0.085564f, 0.220205f,  -0.265606f, -0.203623f,
-  0.204719f,  -0.125922f, 0.038544f,  -0.269379f, 0.025866f,  0.109967f,
-  0.019064f,  -0.237297f, -0.309746f, -0.329118f, -0.278368f, -0.063859f,
-  0.278496f,  0.018620f,  0.209971f,  0.296250f,  0.142850f,  0.288689f,
-  0.137084f,  0.130517f,  0.128171f,  -0.155396f, -0.008449f, -0.099845f,
-  0.173455f,  -0.059909f, -0.147318f, 0.102851f,  -0.251389f, -0.001448f,
-  0.103907f,  0.297273f,  -0.027846f, 0.028260f,  -0.382601f, 0.346695f,
-  -0.601641f, 0.162366f,  -0.477495f, -0.042731f, -0.387871f, -0.051791f,
-  -0.401498f, -0.048446f, -0.456270f, -0.062287f, 0.493919f,  0.003008f,
-  0.099917f,  -0.358525f, -0.094903f, -0.022811f, -0.062259f, 0.019455f,
-  -0.050644f, 0.020041f,  -0.132912f, -0.061578f, -3.083691f, -0.014961f,
-  -0.129115f, -0.710559f, 0.157213f,  -0.844037f, -0.121991f, -0.943386f,
-  -0.231269f, -0.003462f, 0.331478f,  -0.132703f, -1.285993f, -0.120957f,
-  -0.373755f, -0.322609f, 0.309059f,  -0.131523f, -0.118334f, -0.063805f,
-  -0.104251f, 0.012166f,  -0.094699f, -0.283753f, 0.128168f,  -0.526929f,
-  -0.050331f, 0.186153f,  0.005913f,  -0.221236f, 0.036363f,  0.160909f,
-  -0.001342f, -0.382749f, 0.037820f,  0.281689f,  -0.024275f, 0.028854f,
-  0.318291f,  0.318526f,  0.035778f,  0.034031f,  0.189663f,  -0.293367f,
-  0.082022f,  0.127923f,  0.078866f,  -0.081361f, -0.268117f, 0.246675f,
-  0.248605f,  -0.215479f, -0.073084f, 0.496140f,  -0.067327f, 0.396237f,
-  -0.120739f, 0.033752f,  -0.044120f, -0.218941f, -0.028078f, 0.195132f,
-  -0.040400f, 0.281604f,  -0.100471f, 0.415207f,  -0.258503f, -0.429749f,
-  0.150569f,  -0.010859f, 0.136448f,  0.026589f,  0.148466f,  0.110764f,
-  0.380967f,  0.009177f,  0.103075f,  0.116417f,  0.226273f,  -0.327746f,
-  0.169346f,  0.284553f,  -0.094986f, 0.312745f,  -0.147840f, 0.025062f,
-  -0.494482f, 0.112388f,  -0.213962f, 0.107050f,  -0.433371f, -0.096276f,
-  -0.244835f, -0.003518f, -0.459148f, -0.145080f, 0.017150f,  0.042846f,
-  -0.237479f, 0.104746f,  0.158677f,  0.358937f,  0.099921f,  0.277109f,
-  0.012410f,  -0.062897f, 0.116130f,  0.255309f,  0.341628f,  0.145002f,
-  -0.429344f, -0.016433f, -0.068985f, 0.285194f,  -0.286719f, -0.018298f,
-  -0.179369f, -0.194655f, -0.165380f, 0.026071f,  -0.428268f, -0.379929f,
-  -0.727543f, 0.179610f,  -0.963979f, -0.042026f, -0.616202f, 0.133401f,
-  -0.784966f, 0.061205f,  -0.713357f, 0.129795f,  0.120512f,  -0.339545f,
-  0.353557f,  0.114906f,  -0.329813f, -0.209987f, 0.085410f,  0.214313f,
-  -0.122082f, 0.335770f,  -0.020937f, 0.202456f,  0.289023f,  -0.421186f,
-  0.337905f,  0.407663f,  0.132771f,  0.071734f,  0.213914f,  0.128595f,
-  0.302659f,  -0.209501f, 0.217756f,  0.253079f,  -0.089505f, -0.205614f,
-};
-
-static const float av1_tx_split_nn_bias_64x64_layer0[32] = {
-  0.296914f,  -1.826816f, 0.346130f,  0.969520f,  -0.528154f, 1.175862f,
-  -0.075985f, -0.097323f, -0.233059f, 0.004846f,  0.401279f,  -2.272435f,
-  0.086257f,  0.414162f,  -0.194786f, -0.233887f, -0.113215f, -2.453546f,
-  0.861214f,  0.298361f,  0.267397f,  -0.158557f, -0.119911f, -0.098134f,
-  -0.339263f, 0.385871f,  -0.678123f, 0.263218f,  0.251611f,  -1.155773f,
-  -0.365437f, 0.229255f,
-};
-
-static const float av1_tx_split_nn_weights_64x64_layer1[32] = {
-  0.502104f,  -0.708023f, 0.419648f,  1.583418f,  0.419355f,  -1.462981f,
-  -0.439623f, 0.405691f,  0.823257f,  0.061654f,  0.750875f,  0.775031f,
-  -0.387909f, 0.447385f,  0.284690f,  0.353262f,  -0.224347f, 0.832864f,
-  -1.708491f, -1.042447f, -0.272829f, 0.540640f,  0.310509f,  0.723745f,
-  0.245592f,  -0.218417f, -0.597987f, -0.362301f, 0.702217f,  -0.692614f,
-  0.207812f,  0.513560f,
-};
-
-static const float av1_tx_split_nn_bias_64x64_layer1[1] = { -0.2307045f };
-
-static const NN_CONFIG av1_tx_split_nnconfig_64x64 = {
-  12,  // num_inputs
-  1,   // num_outputs
-  1,   // num_hidden_layers
-  {
-      32,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_64x64_layer0,
-      av1_tx_split_nn_weights_64x64_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_64x64_layer0,
-      av1_tx_split_nn_bias_64x64_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 4x16 block.
-static const float av1_tx_split_nn_weights_4x16_layer0[8 * 16] = {
-  -1.344184f, -1.454625f, -0.703110f, -0.140570f, -0.841536f, -0.068131f,
-  -2.128968f, -0.655518f, 0.432180f,  0.879752f,  -0.222211f, 0.061615f,
-  -0.230969f, 0.569496f,  1.424188f,  0.598063f,  -0.436005f, -0.737606f,
-  -0.137875f, -0.085730f, -0.076512f, -0.583101f, -0.937377f, -0.203556f,
-  -0.215797f, -0.015361f, -0.124098f, -0.411917f, 0.340441f,  -0.331752f,
-  -0.472607f, -0.097714f, -0.930572f, -1.354713f, -0.550724f, 0.176212f,
-  -0.636060f, 0.183271f,  -0.610212f, 0.345895f,  -1.100906f, -1.605713f,
-  0.111888f,  -0.140937f, 0.063013f,  -0.013315f, -0.273472f, -0.255870f,
-  1.200328f,  0.274002f,  1.005776f,  0.322392f,  1.222373f,  0.158227f,
-  0.408810f,  0.145022f,  0.139842f,  -1.249412f, 0.286672f,  -0.635699f,
-  0.312562f,  -0.495606f, -1.117034f, -0.085107f, -0.097484f, -0.341521f,
-  -0.132199f, -0.863055f, 0.217579f,  -1.161425f, -0.302087f, -1.357271f,
-  -0.520724f, -1.211069f, -1.048729f, -0.333087f, -1.171527f, -0.280824f,
-  -2.057684f, -0.228755f, 0.606278f,  0.101198f,  -0.314847f, -1.303255f,
-  -0.294964f, 1.301923f,  0.041712f,  0.077593f,  -1.152746f, 0.495315f,
-  -0.751566f, 0.230249f,  -0.840661f, 0.100731f,  1.346269f,  0.649898f,
-  -1.432258f, -0.456710f, -1.018123f, -0.348559f, -1.225226f, -0.170717f,
-  -0.354072f, 0.068292f,  -0.234168f, 0.277503f,  0.179134f,  0.907420f,
-  0.354626f,  -0.627210f, 0.905779f,  0.512612f,  0.161190f,  -0.843177f,
-  0.014953f,  -0.354983f, 0.011116f,  -0.429598f, -1.017138f, -0.211432f,
-  0.941840f,  -0.281747f, 0.957776f,  -0.541914f, 1.041880f,  -0.433580f,
-  -1.416451f, -0.166467f,
-};
-
-static const float av1_tx_split_nn_bias_4x16_layer0[16] = {
-  3.086118f,  -3.235095f, 4.830956f,  -0.165706f, 0.955031f,  4.055783f,
-  -0.311489f, 4.660205f,  -0.576277f, -0.248111f, -0.790519f, -1.686412f,
-  -1.191704f, -3.800073f, 4.121552f,  -1.399397f,
-};
-
-static const float av1_tx_split_nn_weights_4x16_layer1[16] = {
-  -0.758677f, 0.388776f,  0.439906f,  0.011390f, -0.084319f, -0.667969f,
-  -0.467316f, -0.875491f, -0.160668f, 0.805292f, 0.114393f,  -0.549682f,
-  0.462109f,  0.343315f,  1.092593f,  0.483152f,
-};
-
-static const float av1_tx_split_nn_bias_4x16_layer1[1] = {
-  0.8205083f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_4x16 = {
-  8,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_4x16_layer0,
-      av1_tx_split_nn_weights_4x16_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_4x16_layer0,
-      av1_tx_split_nn_bias_4x16_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 16x32 block.
-static const float av1_tx_split_nn_weights_16x32_layer0[8 * 32] = {
-  0.180713f,  0.033211f,  0.607561f,  0.138642f,  0.637204f,  -0.000940f,
-  0.012630f,  0.358109f,  0.022238f,  0.190418f,  0.079088f,  0.065925f,
-  0.038242f,  0.162380f,  -0.122728f, 0.379382f,  -0.303283f, -0.327550f,
-  0.029120f,  -0.284553f, 0.269588f,  -0.309805f, -0.241036f, -0.161103f,
-  -0.304887f, 0.239843f,  -0.149146f, 0.311234f,  -0.073640f, -0.132718f,
-  0.178901f,  0.474712f,  0.020280f,  0.063685f,  -0.609170f, -0.013658f,
-  -0.338074f, 0.250429f,  0.082978f,  -0.186315f, -0.788959f, 0.039859f,
-  -0.426461f, -0.001524f, -0.447211f, 0.378102f,  0.315617f,  0.017428f,
-  0.745494f,  -0.219024f, 0.512836f,  0.200522f,  0.680449f,  0.313686f,
-  -0.412569f, -0.132927f, 0.631120f,  0.042735f,  0.336153f,  0.044772f,
-  0.432606f,  0.175681f,  -0.634411f, -0.073509f, -0.040643f, -0.559260f,
-  -0.104034f, -0.570495f, -0.247365f, 0.063256f,  -0.582021f, -0.492585f,
-  -0.194955f, -0.207934f, -0.506627f, 0.021743f,  -0.416518f, 0.320876f,
-  0.115889f,  0.149399f,  -0.229376f, 0.095505f,  0.115191f,  -0.471921f,
-  0.113068f,  0.343684f,  -0.036831f, 0.021240f,  0.295112f,  0.031166f,
-  0.448201f,  -0.132241f, 0.164032f,  0.355572f,  0.072154f,  0.017335f,
-  -0.046113f, 0.178719f,  -0.026881f, -0.242590f, 0.055073f,  -0.012958f,
-  0.077904f,  0.351356f,  0.107655f,  0.260568f,  -0.080052f, -0.197553f,
-  0.085763f,  0.263416f,  -0.327741f, 0.158855f,  0.056899f,  -0.162121f,
-  0.339518f,  -0.571204f, 0.264966f,  -0.252214f, -0.202560f, -0.134213f,
-  -0.330188f, 0.009470f,  -0.468376f, -0.065240f, -0.307957f, 0.116479f,
-  -0.222238f, -0.458716f, 0.186493f,  -0.391415f, 0.118649f,  -0.104653f,
-  -0.259958f, -0.332081f, -0.403785f, -0.050147f, -0.573511f, 0.177117f,
-  -0.598358f, 0.164947f,  -0.119694f, -0.058520f, 0.203829f,  -0.267404f,
-  -0.048202f, -0.600006f, 0.181594f,  -0.731805f, 0.146417f,  -0.687148f,
-  -1.210525f, -0.450101f, -0.620635f, 0.208825f,  -0.611357f, 0.112202f,
-  -0.309468f, -0.323545f, 0.357770f,  0.308061f,  0.553199f,  0.049012f,
-  0.530093f,  -0.208597f, 0.607882f,  -0.058120f, -0.527634f, 0.018136f,
-  0.060753f,  0.118894f,  0.175649f,  0.014731f,  0.428318f,  -0.106465f,
-  -0.119077f, 0.080179f,  0.524997f,  0.368286f,  0.528286f,  0.213659f,
-  0.639286f,  0.195079f,  -0.049815f, -0.092008f, -0.302958f, 0.298149f,
-  -0.173870f, -0.145205f, -0.233589f, -0.303368f, 0.141275f,  0.325622f,
-  -0.115293f, 0.155188f,  0.047225f,  0.231050f,  -0.167447f, 0.349754f,
-  0.295544f,  -0.319466f, 0.095144f,  0.174612f,  -0.194652f, 0.305915f,
-  -0.239008f, -0.037453f, 0.280696f,  0.125850f,  0.749196f,  -0.101919f,
-  0.791808f,  -0.236811f, 0.064157f,  0.032865f,  -0.225911f, 0.350384f,
-  0.723183f,  -0.103992f, 0.483085f,  -0.123992f, 0.602138f,  0.023895f,
-  -0.692601f, -0.118387f, 0.162527f,  0.145178f,  -0.184702f, -0.017753f,
-  -0.159436f, 0.124105f,  -0.131067f, 0.310275f,  0.151499f,  0.138924f,
-  0.537459f,  0.263212f,  0.615896f,  0.281255f,  0.021293f,  -0.473459f,
-  0.210145f,  -0.056682f, 0.063658f,  0.377254f,  -0.314410f, -0.183487f,
-  0.300384f,  0.328471f,  0.164694f,  -0.159272f, -0.160942f, -0.502861f,
-  -0.129147f, 0.045916f,  -0.606865f, -0.101378f,
-};
-
-static const float av1_tx_split_nn_bias_16x32_layer0[32] = {
-  0.051664f,  -0.212487f, -0.077596f, -0.818467f, 0.638475f,  -0.759937f,
-  0.157198f,  0.989640f,  1.586035f,  0.431144f,  0.041605f,  0.543085f,
-  0.498379f,  0.320504f,  0.134233f,  0.670979f,  -0.105562f, -1.574879f,
-  1.261812f,  -0.287530f, -1.610592f, 0.730899f,  -0.894240f, -0.657790f,
-  0.270806f,  -0.181708f, 0.298578f,  0.817240f,  -0.221508f, -0.201771f,
-  -0.294389f, 1.456413f,
-};
-
-static const float av1_tx_split_nn_weights_16x32_layer1[32] = {
-  1.208914f,  0.324728f,  0.383352f,  -0.874321f, 0.172565f,  -0.580927f,
-  -0.432927f, 0.433698f,  -0.801935f, 0.672028f,  0.563493f,  0.260077f,
-  -0.200557f, -0.121638f, 0.530735f,  -0.525196f, 0.281799f,  0.624204f,
-  -0.662775f, -0.230887f, 0.980989f,  0.223437f,  -0.790591f, 0.600724f,
-  -0.273445f, 0.427635f,  -0.501641f, -0.878390f, 0.234731f,  -0.172550f,
-  0.418904f,  1.792187f,
-};
-
-static const float av1_tx_split_nn_bias_16x32_layer1[1] = {
-  -0.29233751f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_16x32 = {
-  8,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      32,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_16x32_layer0,
-      av1_tx_split_nn_weights_16x32_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_16x32_layer0,
-      av1_tx_split_nn_bias_16x32_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 32x64 block.
-static const float av1_tx_split_nn_weights_32x64_layer0[8 * 32] = {
-  0.031614f,  -0.110926f, 0.052418f,  -0.702506f, 0.045708f,  0.238329f,
-  -0.021806f, -0.208128f, 0.509745f,  -0.293891f, 0.277788f,  0.113937f,
-  0.741576f,  0.062848f,  0.351878f,  0.212532f,  0.385842f,  0.081517f,
-  0.398502f,  -0.015156f, 0.242616f,  0.214619f,  -0.182678f, -0.170546f,
-  0.110605f,  -0.236749f, -0.023831f, -0.285243f, 0.147156f,  -0.257639f,
-  0.341355f,  -0.571641f, -0.721797f, 0.139588f,  -0.518494f, -0.206526f,
-  -0.570560f, -0.184295f, 0.110271f,  0.210292f,  -0.109132f, -0.001080f,
-  0.129251f,  -0.204230f, -0.396312f, -0.183024f, 0.421243f,  -0.013154f,
-  0.222627f,  0.169826f,  0.226037f,  0.218153f,  -0.343528f, 0.274906f,
-  -0.156632f, 0.250261f,  -0.484020f, 0.019909f,  -0.349575f, -0.286643f,
-  -0.507396f, 0.202446f,  -0.154110f, -0.292644f, 0.122666f,  0.306963f,
-  0.424895f,  0.005579f,  0.494094f,  -0.079551f, 0.473740f,  0.352414f,
-  -0.356917f, 0.264331f,  -0.554487f, 0.119978f,  0.012291f,  -0.141641f,
-  -0.254714f, -0.213723f, -0.116701f, -0.011267f, 0.190025f,  -0.118501f,
-  0.305151f,  -0.316782f, -0.220801f, -0.308420f, -0.324285f, 0.421329f,
-  -0.177066f, -0.055114f, 0.229698f,  -0.199523f, 0.054278f,  0.365020f,
-  -0.060586f, -0.300618f, 0.157563f,  -0.064338f, -0.005711f, -0.176991f,
-  -0.424502f, -0.111914f, 0.092608f,  0.126621f,  0.078547f,  0.148008f,
-  0.024221f,  0.124599f,  0.001343f,  0.059402f,  0.453753f,  0.047102f,
-  0.242544f,  0.055735f,  -0.067451f, -0.170061f, -0.170469f, -0.232173f,
-  0.214908f,  0.248889f,  0.544348f,  -0.084566f, 0.402478f,  0.298031f,
-  0.099038f,  -0.238019f, -0.475085f, -0.070042f, -0.754955f, -0.049095f,
-  -0.783801f, -0.099857f, -0.582008f, -0.055194f, -0.103655f, 0.143689f,
-  0.100219f,  0.293934f,  0.099271f,  -0.036320f, 0.356626f,  -0.261445f,
-  0.879544f,  0.000878f,  0.532920f,  -0.093918f, 0.508867f,  -0.040215f,
-  -0.789042f, -0.145380f, -0.090040f, -0.066636f, 0.015212f,  0.352989f,
-  -0.058831f, -0.164588f, 0.039890f,  0.122861f,  0.222508f,  0.061217f,
-  0.466487f,  0.022666f,  0.423777f,  -0.002200f, -0.656835f, -0.099760f,
-  -0.520606f, 0.303204f,  -0.563620f, -0.160922f, -0.243203f, 0.313354f,
-  -0.336516f, -0.206764f, -0.236040f, 0.325899f,  -0.418748f, 0.163205f,
-  -0.476242f, -0.121928f, 0.139178f,  -0.157193f, -0.531766f, -0.180202f,
-  -0.485254f, 0.187703f,  -0.440072f, 0.137854f,  0.029139f,  0.109530f,
-  -0.078475f, -0.360618f, -0.334672f, -0.350890f, -0.403976f, 0.180336f,
-  -0.304542f, 0.005123f,  0.413995f,  0.314639f,  0.342648f,  -0.293264f,
-  0.358135f,  -0.180425f, -0.369530f, -0.048413f, 0.498366f,  0.121875f,
-  0.270948f,  -0.187966f, 0.342503f,  0.174420f,  -0.352105f, 0.088080f,
-  0.008277f,  0.020275f,  -0.002381f, 0.504389f,  -0.018832f, -0.366047f,
-  -0.090947f, -0.168150f, 0.016184f,  -0.328914f, 0.089579f,  -0.017349f,
-  0.005844f,  -0.005010f, -1.857514f, -0.282426f, 0.010177f,  -0.214727f,
-  -0.182529f, 0.156943f,  -0.162032f, -0.472654f, 0.069432f,  0.016901f,
-  -0.767905f, 0.137129f,  -0.411463f, 0.049056f,  -0.431657f, -0.037641f,
-  0.785500f,  0.046225f,  0.195831f,  0.245204f,  0.368614f,  0.212261f,
-  0.440626f,  -0.158048f, -0.461031f, -0.146280f,
-};
-
-static const float av1_tx_split_nn_bias_32x64_layer0[32] = {
-  0.490777f,  -1.894238f, 0.621333f,  -0.076756f, 0.286298f, 0.286375f,
-  -0.126431f, -0.350034f, -1.017572f, 0.620125f,  0.408128f, 0.238756f,
-  -0.060728f, 0.210912f,  0.043124f,  0.445649f,  0.907025f, 0.360272f,
-  1.083101f,  -0.068952f, 1.062348f,  0.396354f,  0.280075f, 0.501732f,
-  0.328422f,  0.066241f,  0.474697f,  0.126313f,  0.741206f, 0.314796f,
-  0.552712f,  0.299410f,
-};
-
-static const float av1_tx_split_nn_weights_32x64_layer1[32] = {
-  1.033823f,  0.603439f,  0.304591f,  -0.279940f, -0.780909f, -0.132801f,
-  0.154059f,  0.662014f,  -0.718368f, 0.198733f,  0.039766f,  -0.208516f,
-  -0.104909f, -0.394209f, 0.081617f,  0.365041f,  -0.874960f, -0.063315f,
-  -1.189897f, 0.337225f,  0.410893f,  0.307519f,  0.221323f,  0.233895f,
-  0.469536f,  0.438557f,  0.280144f,  0.422423f,  -1.394513f, 0.781900f,
-  0.352981f,  0.111265f,
-};
-
-static const float av1_tx_split_nn_bias_32x64_layer1[1] = {
-  -0.18160765f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_32x64 = {
-  8,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      32,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_32x64_layer0,
-      av1_tx_split_nn_weights_32x64_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_32x64_layer0,
-      av1_tx_split_nn_bias_32x64_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 8x32 block.
-static const float av1_tx_split_nn_weights_8x32_layer0[8 * 24] = {
-  -0.687846f, 0.121404f,  -0.372905f, 0.126770f,  -0.103298f, -0.101650f,
-  -0.148490f, -0.271740f, 0.682915f,  -0.079765f, 0.634347f,  -0.151503f,
-  0.287692f,  -0.079072f, -0.236948f, 0.065064f,  0.713383f,  0.397123f,
-  0.553621f,  0.368529f,  0.767663f,  -0.046601f, -0.392402f, -0.294822f,
-  -0.292325f, -0.010573f, -0.837945f, 0.050113f,  -0.811360f, 0.199162f,
-  0.150832f,  0.011602f,  0.369694f,  -0.225876f, 0.234113f,  -0.269808f,
-  0.303805f,  -0.190281f, -0.451136f, 0.209755f,  -0.308894f, 0.326956f,
-  0.313591f,  0.089923f,  -0.095754f, 0.390981f,  0.467366f,  0.169670f,
-  0.853322f,  0.054055f,  0.830319f,  -0.121918f, 0.262019f,  -0.093526f,
-  0.385558f,  0.419174f,  0.040198f,  -0.347030f, -0.450492f, -0.106764f,
-  0.487502f,  -0.204188f, 0.430374f,  -0.116388f, 0.236407f,  -0.157376f,
-  0.732294f,  -0.651387f, 0.347446f,  0.342575f,  0.048406f,  0.187657f,
-  0.434899f,  -0.447782f, 0.032728f,  -0.071168f, -0.255327f, 0.104174f,
-  0.095689f,  -0.431743f, 0.725694f,  0.031797f,  0.523171f,  0.061801f,
-  0.469804f,  -0.071068f, -0.059024f, -0.211937f, 0.392134f,  -0.321490f,
-  0.366060f,  -0.427798f, 0.166771f,  0.299652f,  0.044660f,  0.205142f,
-  0.039133f,  -0.051835f, -0.465475f, 0.216976f,  -0.341156f, 0.095358f,
-  0.230807f,  0.201674f,  0.279266f,  -0.713534f, -0.091690f, -0.569708f,
-  -0.119001f, 0.252160f,  -1.544578f, -0.284477f, 0.555348f,  0.226471f,
-  0.347690f,  0.034365f,  0.770835f,  -0.241859f, -0.130241f, 0.292936f,
-  0.396622f,  -0.417916f, 0.492224f,  0.125517f,  0.344824f,  0.232172f,
-  -0.432106f, -0.278745f, 0.035069f,  -0.307247f, -0.120760f, 0.170950f,
-  0.433601f,  0.044286f,  0.141463f,  -0.041382f, 0.529346f,  0.010868f,
-  -0.323674f, 0.185205f,  0.623459f,  0.232842f,  -0.406693f, -0.142944f,
-  0.222988f,  0.343634f,  0.065401f,  0.002621f,  0.805335f,  -0.426926f,
-  0.279181f,  0.131364f,  0.192339f,  -0.402391f, 0.544120f,  -0.060618f,
-  0.467780f,  0.165224f,  -0.373131f, 0.002427f,  0.688064f,  0.322317f,
-  0.259713f,  0.130583f,  0.185032f,  -0.189111f, -0.067821f, 0.010875f,
-  0.644724f,  -0.179291f, 0.463222f,  0.155230f,  0.721384f,  -0.046019f,
-  0.438501f,  0.440027f,  -0.462090f, -0.002039f, -0.468026f, -0.008890f,
-  -0.328530f, 0.370102f,  0.482531f,  0.043471f,  -0.469732f, -0.532663f,
-  0.122081f,  -0.379659f, 0.037219f,  -0.519913f, -0.128975f, -0.404365f,
-};
-
-static const float av1_tx_split_nn_bias_8x32_layer0[24] = {
-  -1.198965f, 0.395204f,  -0.408627f, -0.021654f, -0.658355f, 0.154525f,
-  -0.288354f, 1.207574f,  0.411608f,  0.964678f,  -1.176893f, 1.059006f,
-  -0.472969f, 2.087975f,  1.065536f,  0.595569f,  0.197907f,  -0.349938f,
-  1.013651f,  -0.931093f, -0.973595f, -0.459094f, -1.253062f, 1.624782f,
-};
-
-static const float av1_tx_split_nn_weights_8x32_layer1[24] = {
-  0.815787f,  -0.393465f, -0.483427f, -0.565592f, 0.493494f,  0.430229f,
-  -0.507073f, -0.251379f, -0.353418f, -0.495445f, 0.820029f,  0.649146f,
-  -0.487383f, 1.844503f,  0.480324f,  -0.982705f, -0.501446f, -0.220584f,
-  0.334299f,  0.802238f,  0.805838f,  -0.487848f, 0.300772f,  -1.232857f,
-};
-
-static const float av1_tx_split_nn_bias_8x32_layer1[1] = {
-  0.13435879f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_8x32 = {
-  8,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      24,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_8x32_layer0,
-      av1_tx_split_nn_weights_8x32_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_8x32_layer0,
-      av1_tx_split_nn_bias_8x32_layer1,
-  },
-};
-/******************************************************************************/
-
-// Tx split model for 16x32 block.
-static const float av1_tx_split_nn_weights_16x64_layer0[8 * 16] = {
-  -0.378223f, -0.124216f, -0.514089f, -0.110117f, -0.585801f, -0.094838f,
-  -0.455385f, -0.220254f, -0.504568f, -0.082351f, -0.476420f, -0.253993f,
-  -0.454709f, -0.059461f, 0.210313f,  -0.155683f, 0.192968f,  -0.127804f,
-  0.471996f,  0.253377f,  0.472625f,  0.485322f,  0.150560f,  0.164868f,
-  -0.475587f, 0.447559f,  -0.455759f, -0.306665f, -0.194866f, -0.283716f,
-  -0.243897f, 0.293020f,  -0.308298f, -0.191904f, -0.468568f, 0.014053f,
-  -0.618848f, 0.096273f,  -0.444586f, 0.347750f,  -0.280643f, -0.062872f,
-  0.118661f,  0.540099f,  0.104141f,  -0.279300f, -0.098721f, -0.173427f,
-  -0.984558f, -0.424559f, -0.411928f, -0.120875f, -0.488999f, -0.050716f,
-  -0.523103f, 0.093620f,  -0.930396f, -0.431997f, -1.163297f, 0.190384f,
-  -0.422581f, -0.005354f, 0.450552f,  0.369210f,  0.562484f,  0.679922f,
-  0.282099f,  -0.039075f, 0.404196f,  0.006371f,  0.069679f,  -0.196160f,
-  -0.213675f, 0.275187f,  -0.104235f, -0.193090f, 0.003116f,  -0.252454f,
-  -0.094591f, 0.210439f,  -0.137070f, 0.145043f,  0.024558f,  0.121718f,
-  0.010138f,  0.301651f,  -0.377990f, 0.444414f,  0.001845f,  -0.095334f,
-  0.550259f,  0.087603f,  0.792492f,  -0.044584f, 0.641706f,  -0.328458f,
-  -0.447791f, 0.135376f,  0.356385f,  0.135748f,  0.310370f,  0.293757f,
-  -0.062000f, -0.056368f, 0.343930f,  0.312039f,  0.370763f,  0.452381f,
-  -0.023630f, -0.185909f, 0.422277f,  -0.006306f, 0.045166f,  0.423359f,
-  -0.157735f, -0.084901f, 0.219527f,  -0.209510f, 0.575057f,  0.249276f,
-  0.069267f,  0.233898f,  -0.229392f, 0.117197f,  -0.038551f, 0.293976f,
-  0.101996f,  0.120878f,
-};
-
-static const float av1_tx_split_nn_bias_16x64_layer0[16] = {
-  1.036995f,  0.160249f,  0.100264f,  0.694881f,  0.694677f,  0.128379f,
-  -0.843405f, -0.405515f, 0.104139f,  0.182980f,  -0.025472f, 0.901067f,
-  -0.299866f, -0.103079f, -0.190352f, -0.048121f,
-};
-
-static const float av1_tx_split_nn_weights_16x64_layer1[16] = {
-  -1.778868f, 0.174690f,  0.211991f, 0.712138f,  0.589352f,  0.466652f,
-  1.029146f,  -0.490044f, 0.483015f, 0.600215f,  -0.577776f, -0.755546f,
-  0.348337f,  -0.205082f, 0.347129f, -0.322277f,
-};
-
-static const float av1_tx_split_nn_bias_16x64_layer1[1] = {
-  0.04230947f,
-};
-
-static const NN_CONFIG av1_tx_split_nnconfig_16x64 = {
-  8,  // num_inputs
-  1,  // num_outputs
-  1,  // num_hidden_layers
-  {
-      16,
-  },  // num_hidden_nodes
-  {
-      av1_tx_split_nn_weights_16x64_layer0,
-      av1_tx_split_nn_weights_16x64_layer1,
-  },
-  {
-      av1_tx_split_nn_bias_16x64_layer0,
-      av1_tx_split_nn_bias_16x64_layer1,
-  },
-};
-/******************************************************************************/
-
-// Map block size to its corresponding neural net model for tx split prediction.
-static const NN_CONFIG *av1_tx_split_nnconfig_map[TX_SIZES_ALL] = {
-  NULL,                          // TX_4X4,
-  &av1_tx_split_nnconfig_8x8,    // TX_8X8,
-  &av1_tx_split_nnconfig_16x16,  // TX_16X16,
-  &av1_tx_split_nnconfig_32x32,  // TX_32X32,
-  &av1_tx_split_nnconfig_64x64,  // TX_64X64,
-  &av1_tx_split_nnconfig_4x8,    // TX_4X8,
-  &av1_tx_split_nnconfig_4x8,    // TX_8X4,
-  &av1_tx_split_nnconfig_8x16,   // TX_8X16,
-  &av1_tx_split_nnconfig_8x16,   // TX_16X8,
-  &av1_tx_split_nnconfig_16x32,  // TX_16X32,
-  &av1_tx_split_nnconfig_16x32,  // TX_32X16,
-  &av1_tx_split_nnconfig_32x64,  // TX_32X64,
-  &av1_tx_split_nnconfig_32x64,  // TX_64X32,
-  &av1_tx_split_nnconfig_4x16,   // TX_4X16,
-  &av1_tx_split_nnconfig_4x16,   // TX_16X4,
-  &av1_tx_split_nnconfig_8x32,   // TX_8X32,
-  &av1_tx_split_nnconfig_8x32,   // TX_32X8,
-  &av1_tx_split_nnconfig_16x64,  // TX_16X64,
-  &av1_tx_split_nnconfig_16x64,  // TX_64X16,
-};
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_
diff --git a/third_party/aom/av1/encoder/wedge_utils.c b/third_party/aom/av1/encoder/wedge_utils.c
deleted file mode 100644
index e6edbb6af..000000000
--- a/third_party/aom/av1/encoder/wedge_utils.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "aom/aom_integer.h"
-
-#include "aom_ports/mem.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-
-#include "av1/common/reconinter.h"
-
-#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
-
-/**
- * Computes SSE of a compound predictor constructed from 2 fundamental
- * predictors p0 and p1 using blending with mask.
- *
- * r1:  Residuals of p1.
- *      (source - p1)
- * d:   Difference of p1 and p0.
- *      (p1 - p0)
- * m:   The blending mask
- * N:   Number of pixels
- *
- * 'r1', 'd', and 'm' are contiguous.
- *
- * Computes:
- *  Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
- *  Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
- *    where r0 is (source - p0), and r1 is (source - p1), which is in turn
- *    is equivalent to:
- *  Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
- *    which is the SSE of the residuals of the compound predictor scaled up by
- *    MAX_MASK_VALUE**2.
- *
- * Note that we clamp the partial term in the loop to 16 bits signed. This is
- * to facilitate equivalent SIMD implementation. It should have no effect if
- * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
- * holds for 8 bit input, and on real input, it should hold practically always,
- * as residuals are expected to be small.
- */
-uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
-                                        const uint8_t *m, int N) {
-  uint64_t csse = 0;
-  int i;
-
-  for (i = 0; i < N; i++) {
-    int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
-    t = clamp(t, INT16_MIN, INT16_MAX);
-    csse += t * t;
-  }
-  return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
-}
-
-/**
- * Choose the mask sign for a compound predictor.
- *
- * ds:    Difference of the squares of the residuals.
- *        r0**2 - r1**2
- * m:     The blending mask
- * N:     Number of pixels
- * limit: Pre-computed threshold value.
- *        MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
- *
- * 'ds' and 'm' are contiguous.
- *
- * Returns true if the negated mask has lower SSE compared to the positive
- * mask. Computation is based on:
- *  Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
- *                                     >
- *                                Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
- *
- *  which can be simplified to:
- *
- *  Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
- *
- *  The right hand side does not depend on the mask, and needs to be passed as
- *  the 'limit' parameter.
- *
- *  After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
- *  hand side is simply a scalar product between an int16_t and uint8_t vector.
- *
- *  Note that for efficiency, ds is stored on 16 bits. Real input residuals
- *  being small, this should not cause a noticeable issue.
- */
-int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N,
-                                    int64_t limit) {
-  int64_t acc = 0;
-
-  do {
-    acc += *ds++ * *m++;
-  } while (--N);
-
-  return acc > limit;
-}
-
-/**
- * Compute the element-wise difference of the squares of 2 arrays.
- *
- * d: Difference of the squares of the inputs: a**2 - b**2
- * a: First input array
- * b: Second input array
- * N: Number of elements
- *
- * 'd', 'a', and 'b' are contiguous.
- *
- * The result is saturated to signed 16 bits.
- */
-void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
-                                       const int16_t *b, int N) {
-  int i;
-
-  for (i = 0; i < N; i++)
-    d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c
deleted file mode 100644
index 07615543c..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c
+++ /dev/null
@@ -1,1217 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/x86/av1_txfm1d_sse4.h"
-
-void av1_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
-                           int8_t cos_bit) {
-  __m128i buf0[32];
-  __m128i buf1[32];
-  const int32_t *cospi;
-  // stage 0
-  // stage 1
-  buf1[0] = _mm_add_epi32(input[0], input[31]);
-  buf1[31] = _mm_sub_epi32(input[0], input[31]);
-  buf1[1] = _mm_add_epi32(input[1], input[30]);
-  buf1[30] = _mm_sub_epi32(input[1], input[30]);
-  buf1[2] = _mm_add_epi32(input[2], input[29]);
-  buf1[29] = _mm_sub_epi32(input[2], input[29]);
-  buf1[3] = _mm_add_epi32(input[3], input[28]);
-  buf1[28] = _mm_sub_epi32(input[3], input[28]);
-  buf1[4] = _mm_add_epi32(input[4], input[27]);
-  buf1[27] = _mm_sub_epi32(input[4], input[27]);
-  buf1[5] = _mm_add_epi32(input[5], input[26]);
-  buf1[26] = _mm_sub_epi32(input[5], input[26]);
-  buf1[6] = _mm_add_epi32(input[6], input[25]);
-  buf1[25] = _mm_sub_epi32(input[6], input[25]);
-  buf1[7] = _mm_add_epi32(input[7], input[24]);
-  buf1[24] = _mm_sub_epi32(input[7], input[24]);
-  buf1[8] = _mm_add_epi32(input[8], input[23]);
-  buf1[23] = _mm_sub_epi32(input[8], input[23]);
-  buf1[9] = _mm_add_epi32(input[9], input[22]);
-  buf1[22] = _mm_sub_epi32(input[9], input[22]);
-  buf1[10] = _mm_add_epi32(input[10], input[21]);
-  buf1[21] = _mm_sub_epi32(input[10], input[21]);
-  buf1[11] = _mm_add_epi32(input[11], input[20]);
-  buf1[20] = _mm_sub_epi32(input[11], input[20]);
-  buf1[12] = _mm_add_epi32(input[12], input[19]);
-  buf1[19] = _mm_sub_epi32(input[12], input[19]);
-  buf1[13] = _mm_add_epi32(input[13], input[18]);
-  buf1[18] = _mm_sub_epi32(input[13], input[18]);
-  buf1[14] = _mm_add_epi32(input[14], input[17]);
-  buf1[17] = _mm_sub_epi32(input[14], input[17]);
-  buf1[15] = _mm_add_epi32(input[15], input[16]);
-  buf1[16] = _mm_sub_epi32(input[15], input[16]);
-
-  // stage 2
-  cospi = cospi_arr(cos_bit);
-  buf0[0] = _mm_add_epi32(buf1[0], buf1[15]);
-  buf0[15] = _mm_sub_epi32(buf1[0], buf1[15]);
-  buf0[1] = _mm_add_epi32(buf1[1], buf1[14]);
-  buf0[14] = _mm_sub_epi32(buf1[1], buf1[14]);
-  buf0[2] = _mm_add_epi32(buf1[2], buf1[13]);
-  buf0[13] = _mm_sub_epi32(buf1[2], buf1[13]);
-  buf0[3] = _mm_add_epi32(buf1[3], buf1[12]);
-  buf0[12] = _mm_sub_epi32(buf1[3], buf1[12]);
-  buf0[4] = _mm_add_epi32(buf1[4], buf1[11]);
-  buf0[11] = _mm_sub_epi32(buf1[4], buf1[11]);
-  buf0[5] = _mm_add_epi32(buf1[5], buf1[10]);
-  buf0[10] = _mm_sub_epi32(buf1[5], buf1[10]);
-  buf0[6] = _mm_add_epi32(buf1[6], buf1[9]);
-  buf0[9] = _mm_sub_epi32(buf1[6], buf1[9]);
-  buf0[7] = _mm_add_epi32(buf1[7], buf1[8]);
-  buf0[8] = _mm_sub_epi32(buf1[7], buf1[8]);
-  buf0[16] = buf1[16];
-  buf0[17] = buf1[17];
-  buf0[18] = buf1[18];
-  buf0[19] = buf1[19];
-  btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[20], buf1[27], buf0[20],
-                      buf0[27], cos_bit);
-  btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[21], buf1[26], buf0[21],
-                      buf0[26], cos_bit);
-  btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[22], buf1[25], buf0[22],
-                      buf0[25], cos_bit);
-  btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[23], buf1[24], buf0[23],
-                      buf0[24], cos_bit);
-  buf0[28] = buf1[28];
-  buf0[29] = buf1[29];
-  buf0[30] = buf1[30];
-  buf0[31] = buf1[31];
-
-  // stage 3
-  cospi = cospi_arr(cos_bit);
-  buf1[0] = _mm_add_epi32(buf0[0], buf0[7]);
-  buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]);
-  buf1[1] = _mm_add_epi32(buf0[1], buf0[6]);
-  buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]);
-  buf1[2] = _mm_add_epi32(buf0[2], buf0[5]);
-  buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]);
-  buf1[3] = _mm_add_epi32(buf0[3], buf0[4]);
-  buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]);
-  buf1[8] = buf0[8];
-  buf1[9] = buf0[9];
-  btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[10], buf0[13], buf1[10],
-                      buf1[13], cos_bit);
-  btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[11], buf0[12], buf1[11],
-                      buf1[12], cos_bit);
-  buf1[14] = buf0[14];
-  buf1[15] = buf0[15];
-  buf1[16] = _mm_add_epi32(buf0[16], buf0[23]);
-  buf1[23] = _mm_sub_epi32(buf0[16], buf0[23]);
-  buf1[17] = _mm_add_epi32(buf0[17], buf0[22]);
-  buf1[22] = _mm_sub_epi32(buf0[17], buf0[22]);
-  buf1[18] = _mm_add_epi32(buf0[18], buf0[21]);
-  buf1[21] = _mm_sub_epi32(buf0[18], buf0[21]);
-  buf1[19] = _mm_add_epi32(buf0[19], buf0[20]);
-  buf1[20] = _mm_sub_epi32(buf0[19], buf0[20]);
-  buf1[24] = _mm_sub_epi32(buf0[31], buf0[24]);
-  buf1[31] = _mm_add_epi32(buf0[31], buf0[24]);
-  buf1[25] = _mm_sub_epi32(buf0[30], buf0[25]);
-  buf1[30] = _mm_add_epi32(buf0[30], buf0[25]);
-  buf1[26] = _mm_sub_epi32(buf0[29], buf0[26]);
-  buf1[29] = _mm_add_epi32(buf0[29], buf0[26]);
-  buf1[27] = _mm_sub_epi32(buf0[28], buf0[27]);
-  buf1[28] = _mm_add_epi32(buf0[28], buf0[27]);
-
-  // stage 4
-  cospi = cospi_arr(cos_bit);
-  buf0[0] = _mm_add_epi32(buf1[0], buf1[3]);
-  buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]);
-  buf0[1] = _mm_add_epi32(buf1[1], buf1[2]);
-  buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]);
-  buf0[4] = buf1[4];
-  btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5], buf0[6],
-                      cos_bit);
-  buf0[7] = buf1[7];
-  buf0[8] = _mm_add_epi32(buf1[8], buf1[11]);
-  buf0[11] = _mm_sub_epi32(buf1[8], buf1[11]);
-  buf0[9] = _mm_add_epi32(buf1[9], buf1[10]);
-  buf0[10] = _mm_sub_epi32(buf1[9], buf1[10]);
-  buf0[12] = _mm_sub_epi32(buf1[15], buf1[12]);
-  buf0[15] = _mm_add_epi32(buf1[15], buf1[12]);
-  buf0[13] = _mm_sub_epi32(buf1[14], buf1[13]);
-  buf0[14] = _mm_add_epi32(buf1[14], buf1[13]);
-  buf0[16] = buf1[16];
-  buf0[17] = buf1[17];
-  btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[18], buf1[29], buf0[18],
-                      buf0[29], cos_bit);
-  btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[19], buf1[28], buf0[19],
-                      buf0[28], cos_bit);
-  btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[20], buf1[27], buf0[20],
-                      buf0[27], cos_bit);
-  btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[21], buf1[26], buf0[21],
-                      buf0[26], cos_bit);
-  buf0[22] = buf1[22];
-  buf0[23] = buf1[23];
-  buf0[24] = buf1[24];
-  buf0[25] = buf1[25];
-  buf0[30] = buf1[30];
-  buf0[31] = buf1[31];
-
-  // stage 5
-  cospi = cospi_arr(cos_bit);
-  btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0], buf1[1],
-                      cos_bit);
-  btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2], buf1[3],
-                      cos_bit);
-  buf1[4] = _mm_add_epi32(buf0[4], buf0[5]);
-  buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]);
-  buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]);
-  buf1[7] = _mm_add_epi32(buf0[7], buf0[6]);
-  buf1[8] = buf0[8];
-  btf_32_sse4_1_type0(-cospi[16], cospi[48], buf0[9], buf0[14], buf1[9],
-                      buf1[14], cos_bit);
-  btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf0[10], buf0[13], buf1[10],
-                      buf1[13], cos_bit);
-  buf1[11] = buf0[11];
-  buf1[12] = buf0[12];
-  buf1[15] = buf0[15];
-  buf1[16] = _mm_add_epi32(buf0[16], buf0[19]);
-  buf1[19] = _mm_sub_epi32(buf0[16], buf0[19]);
-  buf1[17] = _mm_add_epi32(buf0[17], buf0[18]);
-  buf1[18] = _mm_sub_epi32(buf0[17], buf0[18]);
-  buf1[20] = _mm_sub_epi32(buf0[23], buf0[20]);
-  buf1[23] = _mm_add_epi32(buf0[23], buf0[20]);
-  buf1[21] = _mm_sub_epi32(buf0[22], buf0[21]);
-  buf1[22] = _mm_add_epi32(buf0[22], buf0[21]);
-  buf1[24] = _mm_add_epi32(buf0[24], buf0[27]);
-  buf1[27] = _mm_sub_epi32(buf0[24], buf0[27]);
-  buf1[25] = _mm_add_epi32(buf0[25], buf0[26]);
-  buf1[26] = _mm_sub_epi32(buf0[25], buf0[26]);
-  buf1[28] = _mm_sub_epi32(buf0[31], buf0[28]);
-  buf1[31] = _mm_add_epi32(buf0[31], buf0[28]);
-  buf1[29] = _mm_sub_epi32(buf0[30], buf0[29]);
-  buf1[30] = _mm_add_epi32(buf0[30], buf0[29]);
-
-  // stage 6
-  cospi = cospi_arr(cos_bit);
-  buf0[0] = buf1[0];
-  buf0[1] = buf1[1];
-  buf0[2] = buf1[2];
-  buf0[3] = buf1[3];
-  btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7],
-                      cos_bit);
-  btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5], buf0[6],
-                      cos_bit);
-  buf0[8] = _mm_add_epi32(buf1[8], buf1[9]);
-  buf0[9] = _mm_sub_epi32(buf1[8], buf1[9]);
-  buf0[10] = _mm_sub_epi32(buf1[11], buf1[10]);
-  buf0[11] = _mm_add_epi32(buf1[11], buf1[10]);
-  buf0[12] = _mm_add_epi32(buf1[12], buf1[13]);
-  buf0[13] = _mm_sub_epi32(buf1[12], buf1[13]);
-  buf0[14] = _mm_sub_epi32(buf1[15], buf1[14]);
-  buf0[15] = _mm_add_epi32(buf1[15], buf1[14]);
-  buf0[16] = buf1[16];
-  btf_32_sse4_1_type0(-cospi[8], cospi[56], buf1[17], buf1[30], buf0[17],
-                      buf0[30], cos_bit);
-  btf_32_sse4_1_type0(-cospi[56], -cospi[8], buf1[18], buf1[29], buf0[18],
-                      buf0[29], cos_bit);
-  buf0[19] = buf1[19];
-  buf0[20] = buf1[20];
-  btf_32_sse4_1_type0(-cospi[40], cospi[24], buf1[21], buf1[26], buf0[21],
-                      buf0[26], cos_bit);
-  btf_32_sse4_1_type0(-cospi[24], -cospi[40], buf1[22], buf1[25], buf0[22],
-                      buf0[25], cos_bit);
-  buf0[23] = buf1[23];
-  buf0[24] = buf1[24];
-  buf0[27] = buf1[27];
-  buf0[28] = buf1[28];
-  buf0[31] = buf1[31];
-
-  // stage 7
-  cospi = cospi_arr(cos_bit);
-  buf1[0] = buf0[0];
-  buf1[1] = buf0[1];
-  buf1[2] = buf0[2];
-  buf1[3] = buf0[3];
-  buf1[4] = buf0[4];
-  buf1[5] = buf0[5];
-  buf1[6] = buf0[6];
-  buf1[7] = buf0[7];
-  btf_32_sse4_1_type1(cospi[60], cospi[4], buf0[8], buf0[15], buf1[8], buf1[15],
-                      cos_bit);
-  btf_32_sse4_1_type1(cospi[28], cospi[36], buf0[9], buf0[14], buf1[9],
-                      buf1[14], cos_bit);
-  btf_32_sse4_1_type1(cospi[44], cospi[20], buf0[10], buf0[13], buf1[10],
-                      buf1[13], cos_bit);
-  btf_32_sse4_1_type1(cospi[12], cospi[52], buf0[11], buf0[12], buf1[11],
-                      buf1[12], cos_bit);
-  buf1[16] = _mm_add_epi32(buf0[16], buf0[17]);
-  buf1[17] = _mm_sub_epi32(buf0[16], buf0[17]);
-  buf1[18] = _mm_sub_epi32(buf0[19], buf0[18]);
-  buf1[19] = _mm_add_epi32(buf0[19], buf0[18]);
-  buf1[20] = _mm_add_epi32(buf0[20], buf0[21]);
-  buf1[21] = _mm_sub_epi32(buf0[20], buf0[21]);
-  buf1[22] = _mm_sub_epi32(buf0[23], buf0[22]);
-  buf1[23] = _mm_add_epi32(buf0[23], buf0[22]);
-  buf1[24] = _mm_add_epi32(buf0[24], buf0[25]);
-  buf1[25] = _mm_sub_epi32(buf0[24], buf0[25]);
-  buf1[26] = _mm_sub_epi32(buf0[27], buf0[26]);
-  buf1[27] = _mm_add_epi32(buf0[27], buf0[26]);
-  buf1[28] = _mm_add_epi32(buf0[28], buf0[29]);
-  buf1[29] = _mm_sub_epi32(buf0[28], buf0[29]);
-  buf1[30] = _mm_sub_epi32(buf0[31], buf0[30]);
-  buf1[31] = _mm_add_epi32(buf0[31], buf0[30]);
-
-  // stage 8
-  cospi = cospi_arr(cos_bit);
-  buf0[0] = buf1[0];
-  buf0[1] = buf1[1];
-  buf0[2] = buf1[2];
-  buf0[3] = buf1[3];
-  buf0[4] = buf1[4];
-  buf0[5] = buf1[5];
-  buf0[6] = buf1[6];
-  buf0[7] = buf1[7];
-  buf0[8] = buf1[8];
-  buf0[9] = buf1[9];
-  buf0[10] = buf1[10];
-  buf0[11] = buf1[11];
-  buf0[12] = buf1[12];
-  buf0[13] = buf1[13];
-  buf0[14] = buf1[14];
-  buf0[15] = buf1[15];
-  btf_32_sse4_1_type1(cospi[62], cospi[2], buf1[16], buf1[31], buf0[16],
-                      buf0[31], cos_bit);
-  btf_32_sse4_1_type1(cospi[30], cospi[34], buf1[17], buf1[30], buf0[17],
-                      buf0[30], cos_bit);
-  btf_32_sse4_1_type1(cospi[46], cospi[18], buf1[18], buf1[29], buf0[18],
-                      buf0[29], cos_bit);
-  btf_32_sse4_1_type1(cospi[14], cospi[50], buf1[19], buf1[28], buf0[19],
-                      buf0[28], cos_bit);
-  btf_32_sse4_1_type1(cospi[54], cospi[10], buf1[20], buf1[27], buf0[20],
-                      buf0[27], cos_bit);
-  btf_32_sse4_1_type1(cospi[22], cospi[42], buf1[21], buf1[26], buf0[21],
-                      buf0[26], cos_bit);
-  btf_32_sse4_1_type1(cospi[38], cospi[26], buf1[22], buf1[25], buf0[22],
-                      buf0[25], cos_bit);
-  btf_32_sse4_1_type1(cospi[6], cospi[58], buf1[23], buf1[24], buf0[23],
-                      buf0[24], cos_bit);
-
-  // stage 9
-  output[0] = buf0[0];
-  output[1] = buf0[16];
-  output[2] = buf0[8];
-  output[3] = buf0[24];
-  output[4] = buf0[4];
-  output[5] = buf0[20];
-  output[6] = buf0[12];
-  output[7] = buf0[28];
-  output[8] = buf0[2];
-  output[9] = buf0[18];
-  output[10] = buf0[10];
-  output[11] = buf0[26];
-  output[12] = buf0[6];
-  output[13] = buf0[22];
-  output[14] = buf0[14];
-  output[15] = buf0[30];
-  output[16] = buf0[1];
-  output[17] = buf0[17];
-  output[18] = buf0[9];
-  output[19] = buf0[25];
-  output[20] = buf0[5];
-  output[21] = buf0[21];
-  output[22] = buf0[13];
-  output[23] = buf0[29];
-  output[24] = buf0[3];
-  output[25] = buf0[19];
-  output[26] = buf0[11];
-  output[27] = buf0[27];
-  output[28] = buf0[7];
-  output[29] = buf0[23];
-  output[30] = buf0[15];
-  output[31] = buf0[31];
-}
-
-void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range) {
-  const int txfm_size = 4;
-  const int num_per_128 = 4;
-  const int32_t *cospi;
-  __m128i buf0[4];
-  __m128i buf1[4];
-  int col_num = txfm_size / num_per_128;
-  int col;
-  (void)stage_range;
-  for (col = 0; col < col_num; col++) {
-    // stage 0;
-    int32_t stage_idx = 0;
-    int j;
-    for (j = 0; j < 4; ++j) {
-      buf0[j] = input[j * col_num + col];
-    }
-
-    // stage 1
-    stage_idx++;
-    buf1[0] = buf0[3];
-    buf1[1] = buf0[0];
-    buf1[2] = buf0[1];
-    buf1[3] = buf0[2];
-
-    // stage 2
-    stage_idx++;
-
-    cospi = cospi_arr(cos_bit);
-    btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[0], buf1[1], buf0[0], buf0[1],
-                        cos_bit);
-    btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[2], buf1[3], buf0[2],
-                        buf0[3], cos_bit);
-
-    // stage 3
-    stage_idx++;
-    buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
-    buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
-    buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
-    buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
-
-    // stage 4
-    stage_idx++;
-
-    cospi = cospi_arr(cos_bit);
-    buf0[0] = buf1[0];
-    buf0[1] = buf1[1];
-    btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
-                        buf0[3], cos_bit);
-
-    // stage 5
-    stage_idx++;
-    buf1[0] = buf0[0];
-    buf1[1] = _mm_sub_epi32(_mm_setzero_si128(), buf0[2]);
-    buf1[2] = buf0[3];
-    buf1[3] = _mm_sub_epi32(_mm_setzero_si128(), buf0[1]);
-
-    for (j = 0; j < 4; ++j) {
-      output[j * col_num + col] = buf1[j];
-    }
-  }
-}
-
-void av1_fdct64_new_sse4_1(const __m128i *input, __m128i *output,
-                           int8_t cos_bit, const int instride,
-                           const int outstride) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_m32 = _mm_set1_epi32(-cospi[32]);
-  __m128i cospi_p32 = _mm_set1_epi32(cospi[32]);
-  __m128i cospi_m16 = _mm_set1_epi32(-cospi[16]);
-  __m128i cospi_p48 = _mm_set1_epi32(cospi[48]);
-  __m128i cospi_m48 = _mm_set1_epi32(-cospi[48]);
-  __m128i cospi_p16 = _mm_set1_epi32(cospi[16]);
-  __m128i cospi_m08 = _mm_set1_epi32(-cospi[8]);
-  __m128i cospi_p56 = _mm_set1_epi32(cospi[56]);
-  __m128i cospi_m56 = _mm_set1_epi32(-cospi[56]);
-  __m128i cospi_m40 = _mm_set1_epi32(-cospi[40]);
-  __m128i cospi_p24 = _mm_set1_epi32(cospi[24]);
-  __m128i cospi_m24 = _mm_set1_epi32(-cospi[24]);
-  __m128i cospi_p08 = _mm_set1_epi32(cospi[8]);
-  __m128i cospi_p40 = _mm_set1_epi32(cospi[40]);
-  __m128i cospi_p60 = _mm_set1_epi32(cospi[60]);
-  __m128i cospi_p04 = _mm_set1_epi32(cospi[4]);
-  __m128i cospi_p28 = _mm_set1_epi32(cospi[28]);
-  __m128i cospi_p36 = _mm_set1_epi32(cospi[36]);
-  __m128i cospi_p44 = _mm_set1_epi32(cospi[44]);
-  __m128i cospi_p20 = _mm_set1_epi32(cospi[20]);
-  __m128i cospi_p12 = _mm_set1_epi32(cospi[12]);
-  __m128i cospi_p52 = _mm_set1_epi32(cospi[52]);
-  __m128i cospi_m04 = _mm_set1_epi32(-cospi[4]);
-  __m128i cospi_m60 = _mm_set1_epi32(-cospi[60]);
-  __m128i cospi_m36 = _mm_set1_epi32(-cospi[36]);
-  __m128i cospi_m28 = _mm_set1_epi32(-cospi[28]);
-  __m128i cospi_m20 = _mm_set1_epi32(-cospi[20]);
-  __m128i cospi_m44 = _mm_set1_epi32(-cospi[44]);
-  __m128i cospi_m52 = _mm_set1_epi32(-cospi[52]);
-  __m128i cospi_m12 = _mm_set1_epi32(-cospi[12]);
-  __m128i cospi_p62 = _mm_set1_epi32(cospi[62]);
-  __m128i cospi_p02 = _mm_set1_epi32(cospi[2]);
-  __m128i cospi_p30 = _mm_set1_epi32(cospi[30]);
-  __m128i cospi_p34 = _mm_set1_epi32(cospi[34]);
-  __m128i cospi_p46 = _mm_set1_epi32(cospi[46]);
-  __m128i cospi_p18 = _mm_set1_epi32(cospi[18]);
-  __m128i cospi_p14 = _mm_set1_epi32(cospi[14]);
-  __m128i cospi_p50 = _mm_set1_epi32(cospi[50]);
-  __m128i cospi_p54 = _mm_set1_epi32(cospi[54]);
-  __m128i cospi_p10 = _mm_set1_epi32(cospi[10]);
-  __m128i cospi_p22 = _mm_set1_epi32(cospi[22]);
-  __m128i cospi_p42 = _mm_set1_epi32(cospi[42]);
-  __m128i cospi_p38 = _mm_set1_epi32(cospi[38]);
-  __m128i cospi_p26 = _mm_set1_epi32(cospi[26]);
-  __m128i cospi_p06 = _mm_set1_epi32(cospi[6]);
-  __m128i cospi_p58 = _mm_set1_epi32(cospi[58]);
-  __m128i cospi_p63 = _mm_set1_epi32(cospi[63]);
-  __m128i cospi_p01 = _mm_set1_epi32(cospi[1]);
-  __m128i cospi_p31 = _mm_set1_epi32(cospi[31]);
-  __m128i cospi_p33 = _mm_set1_epi32(cospi[33]);
-  __m128i cospi_p47 = _mm_set1_epi32(cospi[47]);
-  __m128i cospi_p17 = _mm_set1_epi32(cospi[17]);
-  __m128i cospi_p15 = _mm_set1_epi32(cospi[15]);
-  __m128i cospi_p49 = _mm_set1_epi32(cospi[49]);
-  __m128i cospi_p55 = _mm_set1_epi32(cospi[55]);
-  __m128i cospi_p09 = _mm_set1_epi32(cospi[9]);
-  __m128i cospi_p23 = _mm_set1_epi32(cospi[23]);
-  __m128i cospi_p41 = _mm_set1_epi32(cospi[41]);
-  __m128i cospi_p39 = _mm_set1_epi32(cospi[39]);
-  __m128i cospi_p25 = _mm_set1_epi32(cospi[25]);
-  __m128i cospi_p07 = _mm_set1_epi32(cospi[7]);
-  __m128i cospi_p57 = _mm_set1_epi32(cospi[57]);
-  __m128i cospi_p59 = _mm_set1_epi32(cospi[59]);
-  __m128i cospi_p05 = _mm_set1_epi32(cospi[5]);
-  __m128i cospi_p27 = _mm_set1_epi32(cospi[27]);
-  __m128i cospi_p37 = _mm_set1_epi32(cospi[37]);
-  __m128i cospi_p43 = _mm_set1_epi32(cospi[43]);
-  __m128i cospi_p21 = _mm_set1_epi32(cospi[21]);
-  __m128i cospi_p11 = _mm_set1_epi32(cospi[11]);
-  __m128i cospi_p53 = _mm_set1_epi32(cospi[53]);
-  __m128i cospi_p51 = _mm_set1_epi32(cospi[51]);
-  __m128i cospi_p13 = _mm_set1_epi32(cospi[13]);
-  __m128i cospi_p19 = _mm_set1_epi32(cospi[19]);
-  __m128i cospi_p45 = _mm_set1_epi32(cospi[45]);
-  __m128i cospi_p35 = _mm_set1_epi32(cospi[35]);
-  __m128i cospi_p29 = _mm_set1_epi32(cospi[29]);
-  __m128i cospi_p03 = _mm_set1_epi32(cospi[3]);
-  __m128i cospi_p61 = _mm_set1_epi32(cospi[61]);
-
-  // stage 1
-  __m128i x1[64];
-  x1[0] = _mm_add_epi32(input[0 * instride], input[63 * instride]);
-  x1[63] = _mm_sub_epi32(input[0 * instride], input[63 * instride]);
-  x1[1] = _mm_add_epi32(input[1 * instride], input[62 * instride]);
-  x1[62] = _mm_sub_epi32(input[1 * instride], input[62 * instride]);
-  x1[2] = _mm_add_epi32(input[2 * instride], input[61 * instride]);
-  x1[61] = _mm_sub_epi32(input[2 * instride], input[61 * instride]);
-  x1[3] = _mm_add_epi32(input[3 * instride], input[60 * instride]);
-  x1[60] = _mm_sub_epi32(input[3 * instride], input[60 * instride]);
-  x1[4] = _mm_add_epi32(input[4 * instride], input[59 * instride]);
-  x1[59] = _mm_sub_epi32(input[4 * instride], input[59 * instride]);
-  x1[5] = _mm_add_epi32(input[5 * instride], input[58 * instride]);
-  x1[58] = _mm_sub_epi32(input[5 * instride], input[58 * instride]);
-  x1[6] = _mm_add_epi32(input[6 * instride], input[57 * instride]);
-  x1[57] = _mm_sub_epi32(input[6 * instride], input[57 * instride]);
-  x1[7] = _mm_add_epi32(input[7 * instride], input[56 * instride]);
-  x1[56] = _mm_sub_epi32(input[7 * instride], input[56 * instride]);
-  x1[8] = _mm_add_epi32(input[8 * instride], input[55 * instride]);
-  x1[55] = _mm_sub_epi32(input[8 * instride], input[55 * instride]);
-  x1[9] = _mm_add_epi32(input[9 * instride], input[54 * instride]);
-  x1[54] = _mm_sub_epi32(input[9 * instride], input[54 * instride]);
-  x1[10] = _mm_add_epi32(input[10 * instride], input[53 * instride]);
-  x1[53] = _mm_sub_epi32(input[10 * instride], input[53 * instride]);
-  x1[11] = _mm_add_epi32(input[11 * instride], input[52 * instride]);
-  x1[52] = _mm_sub_epi32(input[11 * instride], input[52 * instride]);
-  x1[12] = _mm_add_epi32(input[12 * instride], input[51 * instride]);
-  x1[51] = _mm_sub_epi32(input[12 * instride], input[51 * instride]);
-  x1[13] = _mm_add_epi32(input[13 * instride], input[50 * instride]);
-  x1[50] = _mm_sub_epi32(input[13 * instride], input[50 * instride]);
-  x1[14] = _mm_add_epi32(input[14 * instride], input[49 * instride]);
-  x1[49] = _mm_sub_epi32(input[14 * instride], input[49 * instride]);
-  x1[15] = _mm_add_epi32(input[15 * instride], input[48 * instride]);
-  x1[48] = _mm_sub_epi32(input[15 * instride], input[48 * instride]);
-  x1[16] = _mm_add_epi32(input[16 * instride], input[47 * instride]);
-  x1[47] = _mm_sub_epi32(input[16 * instride], input[47 * instride]);
-  x1[17] = _mm_add_epi32(input[17 * instride], input[46 * instride]);
-  x1[46] = _mm_sub_epi32(input[17 * instride], input[46 * instride]);
-  x1[18] = _mm_add_epi32(input[18 * instride], input[45 * instride]);
-  x1[45] = _mm_sub_epi32(input[18 * instride], input[45 * instride]);
-  x1[19] = _mm_add_epi32(input[19 * instride], input[44 * instride]);
-  x1[44] = _mm_sub_epi32(input[19 * instride], input[44 * instride]);
-  x1[20] = _mm_add_epi32(input[20 * instride], input[43 * instride]);
-  x1[43] = _mm_sub_epi32(input[20 * instride], input[43 * instride]);
-  x1[21] = _mm_add_epi32(input[21 * instride], input[42 * instride]);
-  x1[42] = _mm_sub_epi32(input[21 * instride], input[42 * instride]);
-  x1[22] = _mm_add_epi32(input[22 * instride], input[41 * instride]);
-  x1[41] = _mm_sub_epi32(input[22 * instride], input[41 * instride]);
-  x1[23] = _mm_add_epi32(input[23 * instride], input[40 * instride]);
-  x1[40] = _mm_sub_epi32(input[23 * instride], input[40 * instride]);
-  x1[24] = _mm_add_epi32(input[24 * instride], input[39 * instride]);
-  x1[39] = _mm_sub_epi32(input[24 * instride], input[39 * instride]);
-  x1[25] = _mm_add_epi32(input[25 * instride], input[38 * instride]);
-  x1[38] = _mm_sub_epi32(input[25 * instride], input[38 * instride]);
-  x1[26] = _mm_add_epi32(input[26 * instride], input[37 * instride]);
-  x1[37] = _mm_sub_epi32(input[26 * instride], input[37 * instride]);
-  x1[27] = _mm_add_epi32(input[27 * instride], input[36 * instride]);
-  x1[36] = _mm_sub_epi32(input[27 * instride], input[36 * instride]);
-  x1[28] = _mm_add_epi32(input[28 * instride], input[35 * instride]);
-  x1[35] = _mm_sub_epi32(input[28 * instride], input[35 * instride]);
-  x1[29] = _mm_add_epi32(input[29 * instride], input[34 * instride]);
-  x1[34] = _mm_sub_epi32(input[29 * instride], input[34 * instride]);
-  x1[30] = _mm_add_epi32(input[30 * instride], input[33 * instride]);
-  x1[33] = _mm_sub_epi32(input[30 * instride], input[33 * instride]);
-  x1[31] = _mm_add_epi32(input[31 * instride], input[32 * instride]);
-  x1[32] = _mm_sub_epi32(input[31 * instride], input[32 * instride]);
-
-  // stage 2
-  __m128i x2[64];
-  x2[0] = _mm_add_epi32(x1[0], x1[31]);
-  x2[31] = _mm_sub_epi32(x1[0], x1[31]);
-  x2[1] = _mm_add_epi32(x1[1], x1[30]);
-  x2[30] = _mm_sub_epi32(x1[1], x1[30]);
-  x2[2] = _mm_add_epi32(x1[2], x1[29]);
-  x2[29] = _mm_sub_epi32(x1[2], x1[29]);
-  x2[3] = _mm_add_epi32(x1[3], x1[28]);
-  x2[28] = _mm_sub_epi32(x1[3], x1[28]);
-  x2[4] = _mm_add_epi32(x1[4], x1[27]);
-  x2[27] = _mm_sub_epi32(x1[4], x1[27]);
-  x2[5] = _mm_add_epi32(x1[5], x1[26]);
-  x2[26] = _mm_sub_epi32(x1[5], x1[26]);
-  x2[6] = _mm_add_epi32(x1[6], x1[25]);
-  x2[25] = _mm_sub_epi32(x1[6], x1[25]);
-  x2[7] = _mm_add_epi32(x1[7], x1[24]);
-  x2[24] = _mm_sub_epi32(x1[7], x1[24]);
-  x2[8] = _mm_add_epi32(x1[8], x1[23]);
-  x2[23] = _mm_sub_epi32(x1[8], x1[23]);
-  x2[9] = _mm_add_epi32(x1[9], x1[22]);
-  x2[22] = _mm_sub_epi32(x1[9], x1[22]);
-  x2[10] = _mm_add_epi32(x1[10], x1[21]);
-  x2[21] = _mm_sub_epi32(x1[10], x1[21]);
-  x2[11] = _mm_add_epi32(x1[11], x1[20]);
-  x2[20] = _mm_sub_epi32(x1[11], x1[20]);
-  x2[12] = _mm_add_epi32(x1[12], x1[19]);
-  x2[19] = _mm_sub_epi32(x1[12], x1[19]);
-  x2[13] = _mm_add_epi32(x1[13], x1[18]);
-  x2[18] = _mm_sub_epi32(x1[13], x1[18]);
-  x2[14] = _mm_add_epi32(x1[14], x1[17]);
-  x2[17] = _mm_sub_epi32(x1[14], x1[17]);
-  x2[15] = _mm_add_epi32(x1[15], x1[16]);
-  x2[16] = _mm_sub_epi32(x1[15], x1[16]);
-  x2[32] = x1[32];
-  x2[33] = x1[33];
-  x2[34] = x1[34];
-  x2[35] = x1[35];
-  x2[36] = x1[36];
-  x2[37] = x1[37];
-  x2[38] = x1[38];
-  x2[39] = x1[39];
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[40], x1[55], x2[40], x2[55],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[41], x1[54], x2[41], x2[54],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[42], x1[53], x2[42], x2[53],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[43], x1[52], x2[43], x2[52],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[44], x1[51], x2[44], x2[51],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[45], x1[50], x2[45], x2[50],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[46], x1[49], x2[46], x2[49],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[47], x1[48], x2[47], x2[48],
-                          __rounding, cos_bit);
-  x2[56] = x1[56];
-  x2[57] = x1[57];
-  x2[58] = x1[58];
-  x2[59] = x1[59];
-  x2[60] = x1[60];
-  x2[61] = x1[61];
-  x2[62] = x1[62];
-  x2[63] = x1[63];
-
-  // stage 3
-  __m128i x3[64];
-  x3[0] = _mm_add_epi32(x2[0], x2[15]);
-  x3[15] = _mm_sub_epi32(x2[0], x2[15]);
-  x3[1] = _mm_add_epi32(x2[1], x2[14]);
-  x3[14] = _mm_sub_epi32(x2[1], x2[14]);
-  x3[2] = _mm_add_epi32(x2[2], x2[13]);
-  x3[13] = _mm_sub_epi32(x2[2], x2[13]);
-  x3[3] = _mm_add_epi32(x2[3], x2[12]);
-  x3[12] = _mm_sub_epi32(x2[3], x2[12]);
-  x3[4] = _mm_add_epi32(x2[4], x2[11]);
-  x3[11] = _mm_sub_epi32(x2[4], x2[11]);
-  x3[5] = _mm_add_epi32(x2[5], x2[10]);
-  x3[10] = _mm_sub_epi32(x2[5], x2[10]);
-  x3[6] = _mm_add_epi32(x2[6], x2[9]);
-  x3[9] = _mm_sub_epi32(x2[6], x2[9]);
-  x3[7] = _mm_add_epi32(x2[7], x2[8]);
-  x3[8] = _mm_sub_epi32(x2[7], x2[8]);
-  x3[16] = x2[16];
-  x3[17] = x2[17];
-  x3[18] = x2[18];
-  x3[19] = x2[19];
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[20], x2[27], x3[20], x3[27],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[21], x2[26], x3[21], x3[26],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[22], x2[25], x3[22], x3[25],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[23], x2[24], x3[23], x3[24],
-                          __rounding, cos_bit);
-  x3[28] = x2[28];
-  x3[29] = x2[29];
-  x3[30] = x2[30];
-  x3[31] = x2[31];
-  x3[32] = _mm_add_epi32(x2[32], x2[47]);
-  x3[47] = _mm_sub_epi32(x2[32], x2[47]);
-  x3[33] = _mm_add_epi32(x2[33], x2[46]);
-  x3[46] = _mm_sub_epi32(x2[33], x2[46]);
-  x3[34] = _mm_add_epi32(x2[34], x2[45]);
-  x3[45] = _mm_sub_epi32(x2[34], x2[45]);
-  x3[35] = _mm_add_epi32(x2[35], x2[44]);
-  x3[44] = _mm_sub_epi32(x2[35], x2[44]);
-  x3[36] = _mm_add_epi32(x2[36], x2[43]);
-  x3[43] = _mm_sub_epi32(x2[36], x2[43]);
-  x3[37] = _mm_add_epi32(x2[37], x2[42]);
-  x3[42] = _mm_sub_epi32(x2[37], x2[42]);
-  x3[38] = _mm_add_epi32(x2[38], x2[41]);
-  x3[41] = _mm_sub_epi32(x2[38], x2[41]);
-  x3[39] = _mm_add_epi32(x2[39], x2[40]);
-  x3[40] = _mm_sub_epi32(x2[39], x2[40]);
-  x3[48] = _mm_sub_epi32(x2[63], x2[48]);
-  x3[63] = _mm_add_epi32(x2[63], x2[48]);
-  x3[49] = _mm_sub_epi32(x2[62], x2[49]);
-  x3[62] = _mm_add_epi32(x2[62], x2[49]);
-  x3[50] = _mm_sub_epi32(x2[61], x2[50]);
-  x3[61] = _mm_add_epi32(x2[61], x2[50]);
-  x3[51] = _mm_sub_epi32(x2[60], x2[51]);
-  x3[60] = _mm_add_epi32(x2[60], x2[51]);
-  x3[52] = _mm_sub_epi32(x2[59], x2[52]);
-  x3[59] = _mm_add_epi32(x2[59], x2[52]);
-  x3[53] = _mm_sub_epi32(x2[58], x2[53]);
-  x3[58] = _mm_add_epi32(x2[58], x2[53]);
-  x3[54] = _mm_sub_epi32(x2[57], x2[54]);
-  x3[57] = _mm_add_epi32(x2[57], x2[54]);
-  x3[55] = _mm_sub_epi32(x2[56], x2[55]);
-  x3[56] = _mm_add_epi32(x2[56], x2[55]);
-
-  // stage 4
-  __m128i x4[64];
-  x4[0] = _mm_add_epi32(x3[0], x3[7]);
-  x4[7] = _mm_sub_epi32(x3[0], x3[7]);
-  x4[1] = _mm_add_epi32(x3[1], x3[6]);
-  x4[6] = _mm_sub_epi32(x3[1], x3[6]);
-  x4[2] = _mm_add_epi32(x3[2], x3[5]);
-  x4[5] = _mm_sub_epi32(x3[2], x3[5]);
-  x4[3] = _mm_add_epi32(x3[3], x3[4]);
-  x4[4] = _mm_sub_epi32(x3[3], x3[4]);
-  x4[8] = x3[8];
-  x4[9] = x3[9];
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x3[10], x3[13], x4[10], x4[13],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x3[11], x3[12], x4[11], x4[12],
-                          __rounding, cos_bit);
-  x4[14] = x3[14];
-  x4[15] = x3[15];
-  x4[16] = _mm_add_epi32(x3[16], x3[23]);
-  x4[23] = _mm_sub_epi32(x3[16], x3[23]);
-  x4[17] = _mm_add_epi32(x3[17], x3[22]);
-  x4[22] = _mm_sub_epi32(x3[17], x3[22]);
-  x4[18] = _mm_add_epi32(x3[18], x3[21]);
-  x4[21] = _mm_sub_epi32(x3[18], x3[21]);
-  x4[19] = _mm_add_epi32(x3[19], x3[20]);
-  x4[20] = _mm_sub_epi32(x3[19], x3[20]);
-  x4[24] = _mm_sub_epi32(x3[31], x3[24]);
-  x4[31] = _mm_add_epi32(x3[31], x3[24]);
-  x4[25] = _mm_sub_epi32(x3[30], x3[25]);
-  x4[30] = _mm_add_epi32(x3[30], x3[25]);
-  x4[26] = _mm_sub_epi32(x3[29], x3[26]);
-  x4[29] = _mm_add_epi32(x3[29], x3[26]);
-  x4[27] = _mm_sub_epi32(x3[28], x3[27]);
-  x4[28] = _mm_add_epi32(x3[28], x3[27]);
-  x4[32] = x3[32];
-  x4[33] = x3[33];
-  x4[34] = x3[34];
-  x4[35] = x3[35];
-  btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[36], x3[59], x4[36], x4[59],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[37], x3[58], x4[37], x4[58],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[38], x3[57], x4[38], x4[57],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[39], x3[56], x4[39], x4[56],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[40], x3[55], x4[40], x4[55],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[41], x3[54], x4[41], x4[54],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[42], x3[53], x4[42], x4[53],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[43], x3[52], x4[43], x4[52],
-                          __rounding, cos_bit);
-  x4[44] = x3[44];
-  x4[45] = x3[45];
-  x4[46] = x3[46];
-  x4[47] = x3[47];
-  x4[48] = x3[48];
-  x4[49] = x3[49];
-  x4[50] = x3[50];
-  x4[51] = x3[51];
-  x4[60] = x3[60];
-  x4[61] = x3[61];
-  x4[62] = x3[62];
-  x4[63] = x3[63];
-
-  // stage 5
-  __m128i x5[64];
-  x5[0] = _mm_add_epi32(x4[0], x4[3]);
-  x5[3] = _mm_sub_epi32(x4[0], x4[3]);
-  x5[1] = _mm_add_epi32(x4[1], x4[2]);
-  x5[2] = _mm_sub_epi32(x4[1], x4[2]);
-  x5[4] = x4[4];
-  btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x4[5], x4[6], x5[5], x5[6],
-                          __rounding, cos_bit);
-  x5[7] = x4[7];
-  x5[8] = _mm_add_epi32(x4[8], x4[11]);
-  x5[11] = _mm_sub_epi32(x4[8], x4[11]);
-  x5[9] = _mm_add_epi32(x4[9], x4[10]);
-  x5[10] = _mm_sub_epi32(x4[9], x4[10]);
-  x5[12] = _mm_sub_epi32(x4[15], x4[12]);
-  x5[15] = _mm_add_epi32(x4[15], x4[12]);
-  x5[13] = _mm_sub_epi32(x4[14], x4[13]);
-  x5[14] = _mm_add_epi32(x4[14], x4[13]);
-  x5[16] = x4[16];
-  x5[17] = x4[17];
-  btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x4[18], x4[29], x5[18], x5[29],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x4[19], x4[28], x5[19], x5[28],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x4[20], x4[27], x5[20], x5[27],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x4[21], x4[26], x5[21], x5[26],
-                          __rounding, cos_bit);
-  x5[22] = x4[22];
-  x5[23] = x4[23];
-  x5[24] = x4[24];
-  x5[25] = x4[25];
-  x5[30] = x4[30];
-  x5[31] = x4[31];
-  x5[32] = _mm_add_epi32(x4[32], x4[39]);
-  x5[39] = _mm_sub_epi32(x4[32], x4[39]);
-  x5[33] = _mm_add_epi32(x4[33], x4[38]);
-  x5[38] = _mm_sub_epi32(x4[33], x4[38]);
-  x5[34] = _mm_add_epi32(x4[34], x4[37]);
-  x5[37] = _mm_sub_epi32(x4[34], x4[37]);
-  x5[35] = _mm_add_epi32(x4[35], x4[36]);
-  x5[36] = _mm_sub_epi32(x4[35], x4[36]);
-  x5[40] = _mm_sub_epi32(x4[47], x4[40]);
-  x5[47] = _mm_add_epi32(x4[47], x4[40]);
-  x5[41] = _mm_sub_epi32(x4[46], x4[41]);
-  x5[46] = _mm_add_epi32(x4[46], x4[41]);
-  x5[42] = _mm_sub_epi32(x4[45], x4[42]);
-  x5[45] = _mm_add_epi32(x4[45], x4[42]);
-  x5[43] = _mm_sub_epi32(x4[44], x4[43]);
-  x5[44] = _mm_add_epi32(x4[44], x4[43]);
-  x5[48] = _mm_add_epi32(x4[48], x4[55]);
-  x5[55] = _mm_sub_epi32(x4[48], x4[55]);
-  x5[49] = _mm_add_epi32(x4[49], x4[54]);
-  x5[54] = _mm_sub_epi32(x4[49], x4[54]);
-  x5[50] = _mm_add_epi32(x4[50], x4[53]);
-  x5[53] = _mm_sub_epi32(x4[50], x4[53]);
-  x5[51] = _mm_add_epi32(x4[51], x4[52]);
-  x5[52] = _mm_sub_epi32(x4[51], x4[52]);
-  x5[56] = _mm_sub_epi32(x4[63], x4[56]);
-  x5[63] = _mm_add_epi32(x4[63], x4[56]);
-  x5[57] = _mm_sub_epi32(x4[62], x4[57]);
-  x5[62] = _mm_add_epi32(x4[62], x4[57]);
-  x5[58] = _mm_sub_epi32(x4[61], x4[58]);
-  x5[61] = _mm_add_epi32(x4[61], x4[58]);
-  x5[59] = _mm_sub_epi32(x4[60], x4[59]);
-  x5[60] = _mm_add_epi32(x4[60], x4[59]);
-
-  // stage 6
-  __m128i x6[64];
-  btf_32_type0_sse4_1_new(cospi_p32, cospi_p32, x5[0], x5[1], x6[0], x6[1],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p48, cospi_p16, x5[2], x5[3], x6[2], x6[3],
-                          __rounding, cos_bit);
-  x6[4] = _mm_add_epi32(x5[4], x5[5]);
-  x6[5] = _mm_sub_epi32(x5[4], x5[5]);
-  x6[6] = _mm_sub_epi32(x5[7], x5[6]);
-  x6[7] = _mm_add_epi32(x5[7], x5[6]);
-  x6[8] = x5[8];
-  btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x5[9], x5[14], x6[9], x6[14],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x5[10], x5[13], x6[10], x6[13],
-                          __rounding, cos_bit);
-  x6[11] = x5[11];
-  x6[12] = x5[12];
-  x6[15] = x5[15];
-  x6[16] = _mm_add_epi32(x5[16], x5[19]);
-  x6[19] = _mm_sub_epi32(x5[16], x5[19]);
-  x6[17] = _mm_add_epi32(x5[17], x5[18]);
-  x6[18] = _mm_sub_epi32(x5[17], x5[18]);
-  x6[20] = _mm_sub_epi32(x5[23], x5[20]);
-  x6[23] = _mm_add_epi32(x5[23], x5[20]);
-  x6[21] = _mm_sub_epi32(x5[22], x5[21]);
-  x6[22] = _mm_add_epi32(x5[22], x5[21]);
-  x6[24] = _mm_add_epi32(x5[24], x5[27]);
-  x6[27] = _mm_sub_epi32(x5[24], x5[27]);
-  x6[25] = _mm_add_epi32(x5[25], x5[26]);
-  x6[26] = _mm_sub_epi32(x5[25], x5[26]);
-  x6[28] = _mm_sub_epi32(x5[31], x5[28]);
-  x6[31] = _mm_add_epi32(x5[31], x5[28]);
-  x6[29] = _mm_sub_epi32(x5[30], x5[29]);
-  x6[30] = _mm_add_epi32(x5[30], x5[29]);
-  x6[32] = x5[32];
-  x6[33] = x5[33];
-  btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x5[34], x5[61], x6[34], x6[61],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x5[35], x5[60], x6[35], x6[60],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x5[36], x5[59], x6[36], x6[59],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x5[37], x5[58], x6[37], x6[58],
-                          __rounding, cos_bit);
-  x6[38] = x5[38];
-  x6[39] = x5[39];
-  x6[40] = x5[40];
-  x6[41] = x5[41];
-  btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x5[42], x5[53], x6[42], x6[53],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x5[43], x5[52], x6[43], x6[52],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x5[44], x5[51], x6[44], x6[51],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x5[45], x5[50], x6[45], x6[50],
-                          __rounding, cos_bit);
-  x6[46] = x5[46];
-  x6[47] = x5[47];
-  x6[48] = x5[48];
-  x6[49] = x5[49];
-  x6[54] = x5[54];
-  x6[55] = x5[55];
-  x6[56] = x5[56];
-  x6[57] = x5[57];
-  x6[62] = x5[62];
-  x6[63] = x5[63];
-
-  // stage 7
-  __m128i x7[64];
-  x7[0] = x6[0];
-  x7[1] = x6[1];
-  x7[2] = x6[2];
-  x7[3] = x6[3];
-  btf_32_type1_sse4_1_new(cospi_p56, cospi_p08, x6[4], x6[7], x7[4], x7[7],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p24, cospi_p40, x6[5], x6[6], x7[5], x7[6],
-                          __rounding, cos_bit);
-  x7[8] = _mm_add_epi32(x6[8], x6[9]);
-  x7[9] = _mm_sub_epi32(x6[8], x6[9]);
-  x7[10] = _mm_sub_epi32(x6[11], x6[10]);
-  x7[11] = _mm_add_epi32(x6[11], x6[10]);
-  x7[12] = _mm_add_epi32(x6[12], x6[13]);
-  x7[13] = _mm_sub_epi32(x6[12], x6[13]);
-  x7[14] = _mm_sub_epi32(x6[15], x6[14]);
-  x7[15] = _mm_add_epi32(x6[15], x6[14]);
-  x7[16] = x6[16];
-  btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x6[17], x6[30], x7[17], x7[30],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x6[18], x6[29], x7[18], x7[29],
-                          __rounding, cos_bit);
-  x7[19] = x6[19];
-  x7[20] = x6[20];
-  btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x6[21], x6[26], x7[21], x7[26],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x6[22], x6[25], x7[22], x7[25],
-                          __rounding, cos_bit);
-  x7[23] = x6[23];
-  x7[24] = x6[24];
-  x7[27] = x6[27];
-  x7[28] = x6[28];
-  x7[31] = x6[31];
-  x7[32] = _mm_add_epi32(x6[32], x6[35]);
-  x7[35] = _mm_sub_epi32(x6[32], x6[35]);
-  x7[33] = _mm_add_epi32(x6[33], x6[34]);
-  x7[34] = _mm_sub_epi32(x6[33], x6[34]);
-  x7[36] = _mm_sub_epi32(x6[39], x6[36]);
-  x7[39] = _mm_add_epi32(x6[39], x6[36]);
-  x7[37] = _mm_sub_epi32(x6[38], x6[37]);
-  x7[38] = _mm_add_epi32(x6[38], x6[37]);
-  x7[40] = _mm_add_epi32(x6[40], x6[43]);
-  x7[43] = _mm_sub_epi32(x6[40], x6[43]);
-  x7[41] = _mm_add_epi32(x6[41], x6[42]);
-  x7[42] = _mm_sub_epi32(x6[41], x6[42]);
-  x7[44] = _mm_sub_epi32(x6[47], x6[44]);
-  x7[47] = _mm_add_epi32(x6[47], x6[44]);
-  x7[45] = _mm_sub_epi32(x6[46], x6[45]);
-  x7[46] = _mm_add_epi32(x6[46], x6[45]);
-  x7[48] = _mm_add_epi32(x6[48], x6[51]);
-  x7[51] = _mm_sub_epi32(x6[48], x6[51]);
-  x7[49] = _mm_add_epi32(x6[49], x6[50]);
-  x7[50] = _mm_sub_epi32(x6[49], x6[50]);
-  x7[52] = _mm_sub_epi32(x6[55], x6[52]);
-  x7[55] = _mm_add_epi32(x6[55], x6[52]);
-  x7[53] = _mm_sub_epi32(x6[54], x6[53]);
-  x7[54] = _mm_add_epi32(x6[54], x6[53]);
-  x7[56] = _mm_add_epi32(x6[56], x6[59]);
-  x7[59] = _mm_sub_epi32(x6[56], x6[59]);
-  x7[57] = _mm_add_epi32(x6[57], x6[58]);
-  x7[58] = _mm_sub_epi32(x6[57], x6[58]);
-  x7[60] = _mm_sub_epi32(x6[63], x6[60]);
-  x7[63] = _mm_add_epi32(x6[63], x6[60]);
-  x7[61] = _mm_sub_epi32(x6[62], x6[61]);
-  x7[62] = _mm_add_epi32(x6[62], x6[61]);
-
-  // stage 8
-  __m128i x8[64];
-  x8[0] = x7[0];
-  x8[1] = x7[1];
-  x8[2] = x7[2];
-  x8[3] = x7[3];
-  x8[4] = x7[4];
-  x8[5] = x7[5];
-  x8[6] = x7[6];
-  x8[7] = x7[7];
-  btf_32_type1_sse4_1_new(cospi_p60, cospi_p04, x7[8], x7[15], x8[8], x8[15],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p28, cospi_p36, x7[9], x7[14], x8[9], x8[14],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p44, cospi_p20, x7[10], x7[13], x8[10], x8[13],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p12, cospi_p52, x7[11], x7[12], x8[11], x8[12],
-                          __rounding, cos_bit);
-  x8[16] = _mm_add_epi32(x7[16], x7[17]);
-  x8[17] = _mm_sub_epi32(x7[16], x7[17]);
-  x8[18] = _mm_sub_epi32(x7[19], x7[18]);
-  x8[19] = _mm_add_epi32(x7[19], x7[18]);
-  x8[20] = _mm_add_epi32(x7[20], x7[21]);
-  x8[21] = _mm_sub_epi32(x7[20], x7[21]);
-  x8[22] = _mm_sub_epi32(x7[23], x7[22]);
-  x8[23] = _mm_add_epi32(x7[23], x7[22]);
-  x8[24] = _mm_add_epi32(x7[24], x7[25]);
-  x8[25] = _mm_sub_epi32(x7[24], x7[25]);
-  x8[26] = _mm_sub_epi32(x7[27], x7[26]);
-  x8[27] = _mm_add_epi32(x7[27], x7[26]);
-  x8[28] = _mm_add_epi32(x7[28], x7[29]);
-  x8[29] = _mm_sub_epi32(x7[28], x7[29]);
-  x8[30] = _mm_sub_epi32(x7[31], x7[30]);
-  x8[31] = _mm_add_epi32(x7[31], x7[30]);
-  x8[32] = x7[32];
-  btf_32_type0_sse4_1_new(cospi_m04, cospi_p60, x7[33], x7[62], x8[33], x8[62],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m60, cospi_m04, x7[34], x7[61], x8[34], x8[61],
-                          __rounding, cos_bit);
-  x8[35] = x7[35];
-  x8[36] = x7[36];
-  btf_32_type0_sse4_1_new(cospi_m36, cospi_p28, x7[37], x7[58], x8[37], x8[58],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m28, cospi_m36, x7[38], x7[57], x8[38], x8[57],
-                          __rounding, cos_bit);
-  x8[39] = x7[39];
-  x8[40] = x7[40];
-  btf_32_type0_sse4_1_new(cospi_m20, cospi_p44, x7[41], x7[54], x8[41], x8[54],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m44, cospi_m20, x7[42], x7[53], x8[42], x8[53],
-                          __rounding, cos_bit);
-  x8[43] = x7[43];
-  x8[44] = x7[44];
-  btf_32_type0_sse4_1_new(cospi_m52, cospi_p12, x7[45], x7[50], x8[45], x8[50],
-                          __rounding, cos_bit);
-  btf_32_type0_sse4_1_new(cospi_m12, cospi_m52, x7[46], x7[49], x8[46], x8[49],
-                          __rounding, cos_bit);
-  x8[47] = x7[47];
-  x8[48] = x7[48];
-  x8[51] = x7[51];
-  x8[52] = x7[52];
-  x8[55] = x7[55];
-  x8[56] = x7[56];
-  x8[59] = x7[59];
-  x8[60] = x7[60];
-  x8[63] = x7[63];
-
-  // stage 9
-  __m128i x9[64];
-  x9[0] = x8[0];
-  x9[1] = x8[1];
-  x9[2] = x8[2];
-  x9[3] = x8[3];
-  x9[4] = x8[4];
-  x9[5] = x8[5];
-  x9[6] = x8[6];
-  x9[7] = x8[7];
-  x9[8] = x8[8];
-  x9[9] = x8[9];
-  x9[10] = x8[10];
-  x9[11] = x8[11];
-  x9[12] = x8[12];
-  x9[13] = x8[13];
-  x9[14] = x8[14];
-  x9[15] = x8[15];
-  btf_32_type1_sse4_1_new(cospi_p62, cospi_p02, x8[16], x8[31], x9[16], x9[31],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p30, cospi_p34, x8[17], x8[30], x9[17], x9[30],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p46, cospi_p18, x8[18], x8[29], x9[18], x9[29],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p14, cospi_p50, x8[19], x8[28], x9[19], x9[28],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p54, cospi_p10, x8[20], x8[27], x9[20], x9[27],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p22, cospi_p42, x8[21], x8[26], x9[21], x9[26],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p38, cospi_p26, x8[22], x8[25], x9[22], x9[25],
-                          __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p06, cospi_p58, x8[23], x8[24], x9[23], x9[24],
-                          __rounding, cos_bit);
-  x9[32] = _mm_add_epi32(x8[32], x8[33]);
-  x9[33] = _mm_sub_epi32(x8[32], x8[33]);
-  x9[34] = _mm_sub_epi32(x8[35], x8[34]);
-  x9[35] = _mm_add_epi32(x8[35], x8[34]);
-  x9[36] = _mm_add_epi32(x8[36], x8[37]);
-  x9[37] = _mm_sub_epi32(x8[36], x8[37]);
-  x9[38] = _mm_sub_epi32(x8[39], x8[38]);
-  x9[39] = _mm_add_epi32(x8[39], x8[38]);
-  x9[40] = _mm_add_epi32(x8[40], x8[41]);
-  x9[41] = _mm_sub_epi32(x8[40], x8[41]);
-  x9[42] = _mm_sub_epi32(x8[43], x8[42]);
-  x9[43] = _mm_add_epi32(x8[43], x8[42]);
-  x9[44] = _mm_add_epi32(x8[44], x8[45]);
-  x9[45] = _mm_sub_epi32(x8[44], x8[45]);
-  x9[46] = _mm_sub_epi32(x8[47], x8[46]);
-  x9[47] = _mm_add_epi32(x8[47], x8[46]);
-  x9[48] = _mm_add_epi32(x8[48], x8[49]);
-  x9[49] = _mm_sub_epi32(x8[48], x8[49]);
-  x9[50] = _mm_sub_epi32(x8[51], x8[50]);
-  x9[51] = _mm_add_epi32(x8[51], x8[50]);
-  x9[52] = _mm_add_epi32(x8[52], x8[53]);
-  x9[53] = _mm_sub_epi32(x8[52], x8[53]);
-  x9[54] = _mm_sub_epi32(x8[55], x8[54]);
-  x9[55] = _mm_add_epi32(x8[55], x8[54]);
-  x9[56] = _mm_add_epi32(x8[56], x8[57]);
-  x9[57] = _mm_sub_epi32(x8[56], x8[57]);
-  x9[58] = _mm_sub_epi32(x8[59], x8[58]);
-  x9[59] = _mm_add_epi32(x8[59], x8[58]);
-  x9[60] = _mm_add_epi32(x8[60], x8[61]);
-  x9[61] = _mm_sub_epi32(x8[60], x8[61]);
-  x9[62] = _mm_sub_epi32(x8[63], x8[62]);
-  x9[63] = _mm_add_epi32(x8[63], x8[62]);
-
-  // stage 10
-  __m128i x10[64];
-  x10[0] = x9[0];
-  x10[1] = x9[1];
-  x10[2] = x9[2];
-  x10[3] = x9[3];
-  x10[4] = x9[4];
-  x10[5] = x9[5];
-  x10[6] = x9[6];
-  x10[7] = x9[7];
-  x10[8] = x9[8];
-  x10[9] = x9[9];
-  x10[10] = x9[10];
-  x10[11] = x9[11];
-  x10[12] = x9[12];
-  x10[13] = x9[13];
-  x10[14] = x9[14];
-  x10[15] = x9[15];
-  x10[16] = x9[16];
-  x10[17] = x9[17];
-  x10[18] = x9[18];
-  x10[19] = x9[19];
-  x10[20] = x9[20];
-  x10[21] = x9[21];
-  x10[22] = x9[22];
-  x10[23] = x9[23];
-  x10[24] = x9[24];
-  x10[25] = x9[25];
-  x10[26] = x9[26];
-  x10[27] = x9[27];
-  x10[28] = x9[28];
-  x10[29] = x9[29];
-  x10[30] = x9[30];
-  x10[31] = x9[31];
-  btf_32_type1_sse4_1_new(cospi_p63, cospi_p01, x9[32], x9[63], x10[32],
-                          x10[63], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p31, cospi_p33, x9[33], x9[62], x10[33],
-                          x10[62], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p47, cospi_p17, x9[34], x9[61], x10[34],
-                          x10[61], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p15, cospi_p49, x9[35], x9[60], x10[35],
-                          x10[60], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p55, cospi_p09, x9[36], x9[59], x10[36],
-                          x10[59], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p23, cospi_p41, x9[37], x9[58], x10[37],
-                          x10[58], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p39, cospi_p25, x9[38], x9[57], x10[38],
-                          x10[57], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p07, cospi_p57, x9[39], x9[56], x10[39],
-                          x10[56], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p59, cospi_p05, x9[40], x9[55], x10[40],
-                          x10[55], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p27, cospi_p37, x9[41], x9[54], x10[41],
-                          x10[54], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p43, cospi_p21, x9[42], x9[53], x10[42],
-                          x10[53], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p11, cospi_p53, x9[43], x9[52], x10[43],
-                          x10[52], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p51, cospi_p13, x9[44], x9[51], x10[44],
-                          x10[51], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p19, cospi_p45, x9[45], x9[50], x10[45],
-                          x10[50], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p35, cospi_p29, x9[46], x9[49], x10[46],
-                          x10[49], __rounding, cos_bit);
-  btf_32_type1_sse4_1_new(cospi_p03, cospi_p61, x9[47], x9[48], x10[47],
-                          x10[48], __rounding, cos_bit);
-
-  // stage 11
-  output[0 * outstride] = x10[0];
-  output[1 * outstride] = x10[32];
-  output[2 * outstride] = x10[16];
-  output[3 * outstride] = x10[48];
-  output[4 * outstride] = x10[8];
-  output[5 * outstride] = x10[40];
-  output[6 * outstride] = x10[24];
-  output[7 * outstride] = x10[56];
-  output[8 * outstride] = x10[4];
-  output[9 * outstride] = x10[36];
-  output[10 * outstride] = x10[20];
-  output[11 * outstride] = x10[52];
-  output[12 * outstride] = x10[12];
-  output[13 * outstride] = x10[44];
-  output[14 * outstride] = x10[28];
-  output[15 * outstride] = x10[60];
-  output[16 * outstride] = x10[2];
-  output[17 * outstride] = x10[34];
-  output[18 * outstride] = x10[18];
-  output[19 * outstride] = x10[50];
-  output[20 * outstride] = x10[10];
-  output[21 * outstride] = x10[42];
-  output[22 * outstride] = x10[26];
-  output[23 * outstride] = x10[58];
-  output[24 * outstride] = x10[6];
-  output[25 * outstride] = x10[38];
-  output[26 * outstride] = x10[22];
-  output[27 * outstride] = x10[54];
-  output[28 * outstride] = x10[14];
-  output[29 * outstride] = x10[46];
-  output[30 * outstride] = x10[30];
-  output[31 * outstride] = x10[62];
-  output[32 * outstride] = x10[1];
-  output[33 * outstride] = x10[33];
-  output[34 * outstride] = x10[17];
-  output[35 * outstride] = x10[49];
-  output[36 * outstride] = x10[9];
-  output[37 * outstride] = x10[41];
-  output[38 * outstride] = x10[25];
-  output[39 * outstride] = x10[57];
-  output[40 * outstride] = x10[5];
-  output[41 * outstride] = x10[37];
-  output[42 * outstride] = x10[21];
-  output[43 * outstride] = x10[53];
-  output[44 * outstride] = x10[13];
-  output[45 * outstride] = x10[45];
-  output[46 * outstride] = x10[29];
-  output[47 * outstride] = x10[61];
-  output[48 * outstride] = x10[3];
-  output[49 * outstride] = x10[35];
-  output[50 * outstride] = x10[19];
-  output[51 * outstride] = x10[51];
-  output[52 * outstride] = x10[11];
-  output[53 * outstride] = x10[43];
-  output[54 * outstride] = x10[27];
-  output[55 * outstride] = x10[59];
-  output[56 * outstride] = x10[7];
-  output[57 * outstride] = x10[39];
-  output[58 * outstride] = x10[23];
-  output[59 * outstride] = x10[55];
-  output[60 * outstride] = x10[15];
-  output[61 * outstride] = x10[47];
-  output[62 * outstride] = x10[31];
-  output[63 * outstride] = x10[63];
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c
deleted file mode 100644
index 592462e20..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c
+++ /dev/null
@@ -1,2068 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/enums.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/encoder/x86/av1_fwd_txfm_avx2.h"
-#include "av1/common/x86/av1_txfm_sse2.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-#include "av1/encoder/x86/av1_txfm1d_sse4.h"
-#include "av1/encoder/x86/av1_fwd_txfm_sse2.h"
-#include "aom_dsp/x86/txfm_common_avx2.h"
-
-static INLINE void fdct16x16_new_avx2(const __m256i *input, __m256i *output,
-                                      int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
-  __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
-  __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
-  __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
-  __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
-  __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
-  __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
-  __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
-  __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
-  __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
-  __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
-  __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
-  __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
-  __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]);
-  __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
-  __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
-  __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
-  __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]);
-  __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
-
-  // stage 1
-  __m256i x1[16];
-  btf_16_adds_subs_out_avx2(&x1[0], &x1[15], input[0], input[15]);
-  btf_16_adds_subs_out_avx2(&x1[1], &x1[14], input[1], input[14]);
-  btf_16_adds_subs_out_avx2(&x1[2], &x1[13], input[2], input[13]);
-  btf_16_adds_subs_out_avx2(&x1[3], &x1[12], input[3], input[12]);
-  btf_16_adds_subs_out_avx2(&x1[4], &x1[11], input[4], input[11]);
-  btf_16_adds_subs_out_avx2(&x1[5], &x1[10], input[5], input[10]);
-  btf_16_adds_subs_out_avx2(&x1[6], &x1[9], input[6], input[9]);
-  btf_16_adds_subs_out_avx2(&x1[7], &x1[8], input[7], input[8]);
-
-  // stage 2
-  btf_16_adds_subs_avx2(&x1[0], &x1[7]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[6]);
-  btf_16_adds_subs_avx2(&x1[2], &x1[5]);
-  btf_16_adds_subs_avx2(&x1[3], &x1[4]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit);
-
-  // stage 3
-  btf_16_adds_subs_avx2(&x1[0], &x1[3]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[2]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[8], &x1[11]);
-  btf_16_adds_subs_avx2(&x1[9], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[15], &x1[12]);
-  btf_16_adds_subs_avx2(&x1[14], &x1[13]);
-
-  // stage 4
-  btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[4], &x1[5]);
-  btf_16_adds_subs_avx2(&x1[7], &x1[6]);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit);
-
-  // stage 5
-  btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[8], &x1[9]);
-  btf_16_adds_subs_avx2(&x1[11], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[12], &x1[13]);
-  btf_16_adds_subs_avx2(&x1[15], &x1[14]);
-
-  // stage 6
-  btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit);
-
-  // stage 7
-  output[0] = x1[0];
-  output[1] = x1[8];
-  output[2] = x1[4];
-  output[3] = x1[12];
-  output[4] = x1[2];
-  output[5] = x1[10];
-  output[6] = x1[6];
-  output[7] = x1[14];
-  output[8] = x1[1];
-  output[9] = x1[9];
-  output[10] = x1[5];
-  output[11] = x1[13];
-  output[12] = x1[3];
-  output[13] = x1[11];
-  output[14] = x1[7];
-  output[15] = x1[15];
-}
-
-static INLINE void fdct16x32_new_avx2(const __m256i *input, __m256i *output,
-                                      int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
-  __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
-  __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
-  __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
-  __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
-  __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
-  __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
-  __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
-  __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
-  __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
-  __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
-  __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
-  __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
-  __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
-  __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
-  __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]);
-  __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
-  __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
-  __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
-  __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]);
-  __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
-  __m256i cospi_p62_p02 = pair_set_w16_epi16(cospi[62], cospi[2]);
-  __m256i cospi_m02_p62 = pair_set_w16_epi16(-cospi[2], cospi[62]);
-  __m256i cospi_p30_p34 = pair_set_w16_epi16(cospi[30], cospi[34]);
-  __m256i cospi_m34_p30 = pair_set_w16_epi16(-cospi[34], cospi[30]);
-  __m256i cospi_p46_p18 = pair_set_w16_epi16(cospi[46], cospi[18]);
-  __m256i cospi_m18_p46 = pair_set_w16_epi16(-cospi[18], cospi[46]);
-  __m256i cospi_p14_p50 = pair_set_w16_epi16(cospi[14], cospi[50]);
-  __m256i cospi_m50_p14 = pair_set_w16_epi16(-cospi[50], cospi[14]);
-  __m256i cospi_p54_p10 = pair_set_w16_epi16(cospi[54], cospi[10]);
-  __m256i cospi_m10_p54 = pair_set_w16_epi16(-cospi[10], cospi[54]);
-  __m256i cospi_p22_p42 = pair_set_w16_epi16(cospi[22], cospi[42]);
-  __m256i cospi_m42_p22 = pair_set_w16_epi16(-cospi[42], cospi[22]);
-  __m256i cospi_p38_p26 = pair_set_w16_epi16(cospi[38], cospi[26]);
-  __m256i cospi_m26_p38 = pair_set_w16_epi16(-cospi[26], cospi[38]);
-  __m256i cospi_p06_p58 = pair_set_w16_epi16(cospi[6], cospi[58]);
-  __m256i cospi_m58_p06 = pair_set_w16_epi16(-cospi[58], cospi[6]);
-
-  // stage 1
-  __m256i x1[32];
-  btf_16_adds_subs_out_avx2(&x1[0], &x1[31], input[0], input[31]);
-  btf_16_adds_subs_out_avx2(&x1[1], &x1[30], input[1], input[30]);
-  btf_16_adds_subs_out_avx2(&x1[2], &x1[29], input[2], input[29]);
-  btf_16_adds_subs_out_avx2(&x1[3], &x1[28], input[3], input[28]);
-  btf_16_adds_subs_out_avx2(&x1[4], &x1[27], input[4], input[27]);
-  btf_16_adds_subs_out_avx2(&x1[5], &x1[26], input[5], input[26]);
-  btf_16_adds_subs_out_avx2(&x1[6], &x1[25], input[6], input[25]);
-  btf_16_adds_subs_out_avx2(&x1[7], &x1[24], input[7], input[24]);
-  btf_16_adds_subs_out_avx2(&x1[8], &x1[23], input[8], input[23]);
-  btf_16_adds_subs_out_avx2(&x1[9], &x1[22], input[9], input[22]);
-  btf_16_adds_subs_out_avx2(&x1[10], &x1[21], input[10], input[21]);
-  btf_16_adds_subs_out_avx2(&x1[11], &x1[20], input[11], input[20]);
-  btf_16_adds_subs_out_avx2(&x1[12], &x1[19], input[12], input[19]);
-  btf_16_adds_subs_out_avx2(&x1[13], &x1[18], input[13], input[18]);
-  btf_16_adds_subs_out_avx2(&x1[14], &x1[17], input[14], input[17]);
-  btf_16_adds_subs_out_avx2(&x1[15], &x1[16], input[15], input[16]);
-
-  // stage 2
-  btf_16_adds_subs_avx2(&x1[0], &x1[15]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[14]);
-  btf_16_adds_subs_avx2(&x1[2], &x1[13]);
-  btf_16_adds_subs_avx2(&x1[3], &x1[12]);
-  btf_16_adds_subs_avx2(&x1[4], &x1[11]);
-  btf_16_adds_subs_avx2(&x1[5], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[6], &x1[9]);
-  btf_16_adds_subs_avx2(&x1[7], &x1[8]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[20], &x1[27], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[21], &x1[26], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[22], &x1[25], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[23], &x1[24], _r, cos_bit);
-
-  // stage 3
-  btf_16_adds_subs_avx2(&x1[0], &x1[7]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[6]);
-  btf_16_adds_subs_avx2(&x1[2], &x1[5]);
-  btf_16_adds_subs_avx2(&x1[3], &x1[4]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[16], &x1[23]);
-  btf_16_adds_subs_avx2(&x1[17], &x1[22]);
-  btf_16_adds_subs_avx2(&x1[18], &x1[21]);
-  btf_16_adds_subs_avx2(&x1[19], &x1[20]);
-  btf_16_adds_subs_avx2(&x1[31], &x1[24]);
-  btf_16_adds_subs_avx2(&x1[30], &x1[25]);
-  btf_16_adds_subs_avx2(&x1[29], &x1[26]);
-  btf_16_adds_subs_avx2(&x1[28], &x1[27]);
-
-  // stage 4
-  btf_16_adds_subs_avx2(&x1[0], &x1[3]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[2]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[8], &x1[11]);
-  btf_16_adds_subs_avx2(&x1[9], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[15], &x1[12]);
-  btf_16_adds_subs_avx2(&x1[14], &x1[13]);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[18], &x1[29], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[19], &x1[28], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[20], &x1[27], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[21], &x1[26], _r, cos_bit);
-
-  // stage 5
-  btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[4], &x1[5]);
-  btf_16_adds_subs_avx2(&x1[7], &x1[6]);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[16], &x1[19]);
-  btf_16_adds_subs_avx2(&x1[17], &x1[18]);
-  btf_16_adds_subs_avx2(&x1[23], &x1[20]);
-  btf_16_adds_subs_avx2(&x1[22], &x1[21]);
-  btf_16_adds_subs_avx2(&x1[24], &x1[27]);
-  btf_16_adds_subs_avx2(&x1[25], &x1[26]);
-  btf_16_adds_subs_avx2(&x1[31], &x1[28]);
-  btf_16_adds_subs_avx2(&x1[30], &x1[29]);
-
-  // stage 6
-  btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[8], &x1[9]);
-  btf_16_adds_subs_avx2(&x1[11], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[12], &x1[13]);
-  btf_16_adds_subs_avx2(&x1[15], &x1[14]);
-  btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[17], &x1[30], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[18], &x1[29], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[21], &x1[26], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[22], &x1[25], _r, cos_bit);
-
-  // stage 7
-  btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[16], &x1[17]);
-  btf_16_adds_subs_avx2(&x1[19], &x1[18]);
-  btf_16_adds_subs_avx2(&x1[20], &x1[21]);
-  btf_16_adds_subs_avx2(&x1[23], &x1[22]);
-  btf_16_adds_subs_avx2(&x1[24], &x1[25]);
-  btf_16_adds_subs_avx2(&x1[27], &x1[26]);
-  btf_16_adds_subs_avx2(&x1[28], &x1[29]);
-  btf_16_adds_subs_avx2(&x1[31], &x1[30]);
-
-  // stage 8
-  btf_16_w16_avx2(cospi_p62_p02, cospi_m02_p62, &x1[16], &x1[31], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p30_p34, cospi_m34_p30, &x1[17], &x1[30], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p46_p18, cospi_m18_p46, &x1[18], &x1[29], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p14_p50, cospi_m50_p14, &x1[19], &x1[28], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p54_p10, cospi_m10_p54, &x1[20], &x1[27], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p22_p42, cospi_m42_p22, &x1[21], &x1[26], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p38_p26, cospi_m26_p38, &x1[22], &x1[25], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p06_p58, cospi_m58_p06, &x1[23], &x1[24], _r, cos_bit);
-
-  // stage 9
-  output[0] = x1[0];
-  output[1] = x1[16];
-  output[2] = x1[8];
-  output[3] = x1[24];
-  output[4] = x1[4];
-  output[5] = x1[20];
-  output[6] = x1[12];
-  output[7] = x1[28];
-  output[8] = x1[2];
-  output[9] = x1[18];
-  output[10] = x1[10];
-  output[11] = x1[26];
-  output[12] = x1[6];
-  output[13] = x1[22];
-  output[14] = x1[14];
-  output[15] = x1[30];
-  output[16] = x1[1];
-  output[17] = x1[17];
-  output[18] = x1[9];
-  output[19] = x1[25];
-  output[20] = x1[5];
-  output[21] = x1[21];
-  output[22] = x1[13];
-  output[23] = x1[29];
-  output[24] = x1[3];
-  output[25] = x1[19];
-  output[26] = x1[11];
-  output[27] = x1[27];
-  output[28] = x1[7];
-  output[29] = x1[23];
-  output[30] = x1[15];
-  output[31] = x1[31];
-}
-
-static INLINE void fdct16x64_new_avx2(const __m256i *input, __m256i *output,
-                                      int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
-  __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]);
-  __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
-  __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]);
-  __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]);
-  __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]);
-  __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
-  __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]);
-  __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]);
-  __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]);
-  __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]);
-  __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]);
-  __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]);
-  __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]);
-  __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]);
-  __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]);
-  __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]);
-  __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]);
-  __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]);
-  __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]);
-  __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]);
-  __m256i cospi_m60_m04 = pair_set_w16_epi16(-cospi[60], -cospi[4]);
-  __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]);
-  __m256i cospi_m44_m20 = pair_set_w16_epi16(-cospi[44], -cospi[20]);
-  __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]);
-  __m256i cospi_p62_p02 = pair_set_w16_epi16(cospi[62], cospi[2]);
-  __m256i cospi_m02_p62 = pair_set_w16_epi16(-cospi[2], cospi[62]);
-  __m256i cospi_p30_p34 = pair_set_w16_epi16(cospi[30], cospi[34]);
-  __m256i cospi_m34_p30 = pair_set_w16_epi16(-cospi[34], cospi[30]);
-  __m256i cospi_p46_p18 = pair_set_w16_epi16(cospi[46], cospi[18]);
-  __m256i cospi_m18_p46 = pair_set_w16_epi16(-cospi[18], cospi[46]);
-  __m256i cospi_p14_p50 = pair_set_w16_epi16(cospi[14], cospi[50]);
-  __m256i cospi_m50_p14 = pair_set_w16_epi16(-cospi[50], cospi[14]);
-  __m256i cospi_p54_p10 = pair_set_w16_epi16(cospi[54], cospi[10]);
-  __m256i cospi_m10_p54 = pair_set_w16_epi16(-cospi[10], cospi[54]);
-  __m256i cospi_p22_p42 = pair_set_w16_epi16(cospi[22], cospi[42]);
-  __m256i cospi_m42_p22 = pair_set_w16_epi16(-cospi[42], cospi[22]);
-  __m256i cospi_p38_p26 = pair_set_w16_epi16(cospi[38], cospi[26]);
-  __m256i cospi_m26_p38 = pair_set_w16_epi16(-cospi[26], cospi[38]);
-  __m256i cospi_p06_p58 = pair_set_w16_epi16(cospi[6], cospi[58]);
-  __m256i cospi_m58_p06 = pair_set_w16_epi16(-cospi[58], cospi[6]);
-  __m256i cospi_p63_p01 = pair_set_w16_epi16(cospi[63], cospi[1]);
-  __m256i cospi_m01_p63 = pair_set_w16_epi16(-cospi[1], cospi[63]);
-  __m256i cospi_p31_p33 = pair_set_w16_epi16(cospi[31], cospi[33]);
-  __m256i cospi_m33_p31 = pair_set_w16_epi16(-cospi[33], cospi[31]);
-  __m256i cospi_p47_p17 = pair_set_w16_epi16(cospi[47], cospi[17]);
-  __m256i cospi_m17_p47 = pair_set_w16_epi16(-cospi[17], cospi[47]);
-  __m256i cospi_p15_p49 = pair_set_w16_epi16(cospi[15], cospi[49]);
-  __m256i cospi_m49_p15 = pair_set_w16_epi16(-cospi[49], cospi[15]);
-  __m256i cospi_p55_p09 = pair_set_w16_epi16(cospi[55], cospi[9]);
-  __m256i cospi_m09_p55 = pair_set_w16_epi16(-cospi[9], cospi[55]);
-  __m256i cospi_p23_p41 = pair_set_w16_epi16(cospi[23], cospi[41]);
-  __m256i cospi_m41_p23 = pair_set_w16_epi16(-cospi[41], cospi[23]);
-  __m256i cospi_p39_p25 = pair_set_w16_epi16(cospi[39], cospi[25]);
-  __m256i cospi_m25_p39 = pair_set_w16_epi16(-cospi[25], cospi[39]);
-  __m256i cospi_p07_p57 = pair_set_w16_epi16(cospi[7], cospi[57]);
-  __m256i cospi_m57_p07 = pair_set_w16_epi16(-cospi[57], cospi[7]);
-  __m256i cospi_p59_p05 = pair_set_w16_epi16(cospi[59], cospi[5]);
-  __m256i cospi_m05_p59 = pair_set_w16_epi16(-cospi[5], cospi[59]);
-  __m256i cospi_p27_p37 = pair_set_w16_epi16(cospi[27], cospi[37]);
-  __m256i cospi_m37_p27 = pair_set_w16_epi16(-cospi[37], cospi[27]);
-  __m256i cospi_p43_p21 = pair_set_w16_epi16(cospi[43], cospi[21]);
-  __m256i cospi_m21_p43 = pair_set_w16_epi16(-cospi[21], cospi[43]);
-  __m256i cospi_p11_p53 = pair_set_w16_epi16(cospi[11], cospi[53]);
-  __m256i cospi_m53_p11 = pair_set_w16_epi16(-cospi[53], cospi[11]);
-  __m256i cospi_p51_p13 = pair_set_w16_epi16(cospi[51], cospi[13]);
-  __m256i cospi_m13_p51 = pair_set_w16_epi16(-cospi[13], cospi[51]);
-  __m256i cospi_p19_p45 = pair_set_w16_epi16(cospi[19], cospi[45]);
-  __m256i cospi_m45_p19 = pair_set_w16_epi16(-cospi[45], cospi[19]);
-  __m256i cospi_p35_p29 = pair_set_w16_epi16(cospi[35], cospi[29]);
-  __m256i cospi_m29_p35 = pair_set_w16_epi16(-cospi[29], cospi[35]);
-  __m256i cospi_p03_p61 = pair_set_w16_epi16(cospi[3], cospi[61]);
-  __m256i cospi_m61_p03 = pair_set_w16_epi16(-cospi[61], cospi[3]);
-
-  // stage 1
-  __m256i x1[64];
-  btf_16_adds_subs_out_avx2(&x1[0], &x1[63], input[0], input[63]);
-  btf_16_adds_subs_out_avx2(&x1[1], &x1[62], input[1], input[62]);
-  btf_16_adds_subs_out_avx2(&x1[2], &x1[61], input[2], input[61]);
-  btf_16_adds_subs_out_avx2(&x1[3], &x1[60], input[3], input[60]);
-  btf_16_adds_subs_out_avx2(&x1[4], &x1[59], input[4], input[59]);
-  btf_16_adds_subs_out_avx2(&x1[5], &x1[58], input[5], input[58]);
-  btf_16_adds_subs_out_avx2(&x1[6], &x1[57], input[6], input[57]);
-  btf_16_adds_subs_out_avx2(&x1[7], &x1[56], input[7], input[56]);
-  btf_16_adds_subs_out_avx2(&x1[8], &x1[55], input[8], input[55]);
-  btf_16_adds_subs_out_avx2(&x1[9], &x1[54], input[9], input[54]);
-  btf_16_adds_subs_out_avx2(&x1[10], &x1[53], input[10], input[53]);
-  btf_16_adds_subs_out_avx2(&x1[11], &x1[52], input[11], input[52]);
-  btf_16_adds_subs_out_avx2(&x1[12], &x1[51], input[12], input[51]);
-  btf_16_adds_subs_out_avx2(&x1[13], &x1[50], input[13], input[50]);
-  btf_16_adds_subs_out_avx2(&x1[14], &x1[49], input[14], input[49]);
-  btf_16_adds_subs_out_avx2(&x1[15], &x1[48], input[15], input[48]);
-  btf_16_adds_subs_out_avx2(&x1[16], &x1[47], input[16], input[47]);
-  btf_16_adds_subs_out_avx2(&x1[17], &x1[46], input[17], input[46]);
-  btf_16_adds_subs_out_avx2(&x1[18], &x1[45], input[18], input[45]);
-  btf_16_adds_subs_out_avx2(&x1[19], &x1[44], input[19], input[44]);
-  btf_16_adds_subs_out_avx2(&x1[20], &x1[43], input[20], input[43]);
-  btf_16_adds_subs_out_avx2(&x1[21], &x1[42], input[21], input[42]);
-  btf_16_adds_subs_out_avx2(&x1[22], &x1[41], input[22], input[41]);
-  btf_16_adds_subs_out_avx2(&x1[23], &x1[40], input[23], input[40]);
-  btf_16_adds_subs_out_avx2(&x1[24], &x1[39], input[24], input[39]);
-  btf_16_adds_subs_out_avx2(&x1[25], &x1[38], input[25], input[38]);
-  btf_16_adds_subs_out_avx2(&x1[26], &x1[37], input[26], input[37]);
-  btf_16_adds_subs_out_avx2(&x1[27], &x1[36], input[27], input[36]);
-  btf_16_adds_subs_out_avx2(&x1[28], &x1[35], input[28], input[35]);
-  btf_16_adds_subs_out_avx2(&x1[29], &x1[34], input[29], input[34]);
-  btf_16_adds_subs_out_avx2(&x1[30], &x1[33], input[30], input[33]);
-  btf_16_adds_subs_out_avx2(&x1[31], &x1[32], input[31], input[32]);
-
-  // stage 2
-  btf_16_adds_subs_avx2(&x1[0], &x1[31]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[30]);
-  btf_16_adds_subs_avx2(&x1[2], &x1[29]);
-  btf_16_adds_subs_avx2(&x1[3], &x1[28]);
-  btf_16_adds_subs_avx2(&x1[4], &x1[27]);
-  btf_16_adds_subs_avx2(&x1[5], &x1[26]);
-  btf_16_adds_subs_avx2(&x1[6], &x1[25]);
-  btf_16_adds_subs_avx2(&x1[7], &x1[24]);
-  btf_16_adds_subs_avx2(&x1[8], &x1[23]);
-  btf_16_adds_subs_avx2(&x1[9], &x1[22]);
-  btf_16_adds_subs_avx2(&x1[10], &x1[21]);
-  btf_16_adds_subs_avx2(&x1[11], &x1[20]);
-  btf_16_adds_subs_avx2(&x1[12], &x1[19]);
-  btf_16_adds_subs_avx2(&x1[13], &x1[18]);
-  btf_16_adds_subs_avx2(&x1[14], &x1[17]);
-  btf_16_adds_subs_avx2(&x1[15], &x1[16]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[40], &x1[55], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[41], &x1[54], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[42], &x1[53], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[43], &x1[52], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[44], &x1[51], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[45], &x1[50], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[46], &x1[49], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[47], &x1[48], _r, cos_bit);
-
-  // stage 3
-  btf_16_adds_subs_avx2(&x1[0], &x1[15]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[14]);
-  btf_16_adds_subs_avx2(&x1[2], &x1[13]);
-  btf_16_adds_subs_avx2(&x1[3], &x1[12]);
-  btf_16_adds_subs_avx2(&x1[4], &x1[11]);
-  btf_16_adds_subs_avx2(&x1[5], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[6], &x1[9]);
-  btf_16_adds_subs_avx2(&x1[7], &x1[8]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[20], &x1[27], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[21], &x1[26], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[22], &x1[25], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[23], &x1[24], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[32], &x1[47]);
-  btf_16_adds_subs_avx2(&x1[33], &x1[46]);
-  btf_16_adds_subs_avx2(&x1[34], &x1[45]);
-  btf_16_adds_subs_avx2(&x1[35], &x1[44]);
-  btf_16_adds_subs_avx2(&x1[36], &x1[43]);
-  btf_16_adds_subs_avx2(&x1[37], &x1[42]);
-  btf_16_adds_subs_avx2(&x1[38], &x1[41]);
-  btf_16_adds_subs_avx2(&x1[39], &x1[40]);
-  btf_16_adds_subs_avx2(&x1[63], &x1[48]);
-  btf_16_adds_subs_avx2(&x1[62], &x1[49]);
-  btf_16_adds_subs_avx2(&x1[61], &x1[50]);
-  btf_16_adds_subs_avx2(&x1[60], &x1[51]);
-  btf_16_adds_subs_avx2(&x1[59], &x1[52]);
-  btf_16_adds_subs_avx2(&x1[58], &x1[53]);
-  btf_16_adds_subs_avx2(&x1[57], &x1[54]);
-  btf_16_adds_subs_avx2(&x1[56], &x1[55]);
-
-  // stage 4
-  btf_16_adds_subs_avx2(&x1[0], &x1[7]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[6]);
-  btf_16_adds_subs_avx2(&x1[2], &x1[5]);
-  btf_16_adds_subs_avx2(&x1[3], &x1[4]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[16], &x1[23]);
-  btf_16_adds_subs_avx2(&x1[17], &x1[22]);
-  btf_16_adds_subs_avx2(&x1[18], &x1[21]);
-  btf_16_adds_subs_avx2(&x1[19], &x1[20]);
-  btf_16_adds_subs_avx2(&x1[31], &x1[24]);
-  btf_16_adds_subs_avx2(&x1[30], &x1[25]);
-  btf_16_adds_subs_avx2(&x1[29], &x1[26]);
-  btf_16_adds_subs_avx2(&x1[28], &x1[27]);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[36], &x1[59], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[37], &x1[58], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[38], &x1[57], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[39], &x1[56], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[40], &x1[55], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[41], &x1[54], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[42], &x1[53], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[43], &x1[52], _r, cos_bit);
-
-  // stage 5
-  btf_16_adds_subs_avx2(&x1[0], &x1[3]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[2]);
-  btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[8], &x1[11]);
-  btf_16_adds_subs_avx2(&x1[9], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[15], &x1[12]);
-  btf_16_adds_subs_avx2(&x1[14], &x1[13]);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[18], &x1[29], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[19], &x1[28], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[20], &x1[27], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[21], &x1[26], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[32], &x1[39]);
-  btf_16_adds_subs_avx2(&x1[33], &x1[38]);
-  btf_16_adds_subs_avx2(&x1[34], &x1[37]);
-  btf_16_adds_subs_avx2(&x1[35], &x1[36]);
-  btf_16_adds_subs_avx2(&x1[47], &x1[40]);
-  btf_16_adds_subs_avx2(&x1[46], &x1[41]);
-  btf_16_adds_subs_avx2(&x1[45], &x1[42]);
-  btf_16_adds_subs_avx2(&x1[44], &x1[43]);
-  btf_16_adds_subs_avx2(&x1[48], &x1[55]);
-  btf_16_adds_subs_avx2(&x1[49], &x1[54]);
-  btf_16_adds_subs_avx2(&x1[50], &x1[53]);
-  btf_16_adds_subs_avx2(&x1[51], &x1[52]);
-  btf_16_adds_subs_avx2(&x1[63], &x1[56]);
-  btf_16_adds_subs_avx2(&x1[62], &x1[57]);
-  btf_16_adds_subs_avx2(&x1[61], &x1[58]);
-  btf_16_adds_subs_avx2(&x1[60], &x1[59]);
-
-  // stage 6
-  btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[4], &x1[5]);
-  btf_16_adds_subs_avx2(&x1[7], &x1[6]);
-  btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[16], &x1[19]);
-  btf_16_adds_subs_avx2(&x1[17], &x1[18]);
-  btf_16_adds_subs_avx2(&x1[23], &x1[20]);
-  btf_16_adds_subs_avx2(&x1[22], &x1[21]);
-  btf_16_adds_subs_avx2(&x1[24], &x1[27]);
-  btf_16_adds_subs_avx2(&x1[25], &x1[26]);
-  btf_16_adds_subs_avx2(&x1[31], &x1[28]);
-  btf_16_adds_subs_avx2(&x1[30], &x1[29]);
-  btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[34], &x1[61], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[35], &x1[60], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[36], &x1[59], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[37], &x1[58], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[42], &x1[53], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[43], &x1[52], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[44], &x1[51], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[45], &x1[50], _r, cos_bit);
-
-  // stage 7
-  btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[8], &x1[9]);
-  btf_16_adds_subs_avx2(&x1[11], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[12], &x1[13]);
-  btf_16_adds_subs_avx2(&x1[15], &x1[14]);
-  btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[17], &x1[30], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[18], &x1[29], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[21], &x1[26], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[22], &x1[25], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[32], &x1[35]);
-  btf_16_adds_subs_avx2(&x1[33], &x1[34]);
-  btf_16_adds_subs_avx2(&x1[39], &x1[36]);
-  btf_16_adds_subs_avx2(&x1[38], &x1[37]);
-  btf_16_adds_subs_avx2(&x1[40], &x1[43]);
-  btf_16_adds_subs_avx2(&x1[41], &x1[42]);
-  btf_16_adds_subs_avx2(&x1[47], &x1[44]);
-  btf_16_adds_subs_avx2(&x1[46], &x1[45]);
-  btf_16_adds_subs_avx2(&x1[48], &x1[51]);
-  btf_16_adds_subs_avx2(&x1[49], &x1[50]);
-  btf_16_adds_subs_avx2(&x1[55], &x1[52]);
-  btf_16_adds_subs_avx2(&x1[54], &x1[53]);
-  btf_16_adds_subs_avx2(&x1[56], &x1[59]);
-  btf_16_adds_subs_avx2(&x1[57], &x1[58]);
-  btf_16_adds_subs_avx2(&x1[63], &x1[60]);
-  btf_16_adds_subs_avx2(&x1[62], &x1[61]);
-
-  // stage 8
-  btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[16], &x1[17]);
-  btf_16_adds_subs_avx2(&x1[19], &x1[18]);
-  btf_16_adds_subs_avx2(&x1[20], &x1[21]);
-  btf_16_adds_subs_avx2(&x1[23], &x1[22]);
-  btf_16_adds_subs_avx2(&x1[24], &x1[25]);
-  btf_16_adds_subs_avx2(&x1[27], &x1[26]);
-  btf_16_adds_subs_avx2(&x1[28], &x1[29]);
-  btf_16_adds_subs_avx2(&x1[31], &x1[30]);
-  btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, &x1[33], &x1[62], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m60_m04, cospi_m04_p60, &x1[34], &x1[61], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m36_p28, cospi_p28_p36, &x1[37], &x1[58], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, &x1[38], &x1[57], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, &x1[41], &x1[54], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m44_m20, cospi_m20_p44, &x1[42], &x1[53], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m52_p12, cospi_p12_p52, &x1[45], &x1[50], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, &x1[46], &x1[49], _r, cos_bit);
-
-  // stage 9
-  btf_16_w16_avx2(cospi_p62_p02, cospi_m02_p62, &x1[16], &x1[31], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p30_p34, cospi_m34_p30, &x1[17], &x1[30], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p46_p18, cospi_m18_p46, &x1[18], &x1[29], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p14_p50, cospi_m50_p14, &x1[19], &x1[28], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p54_p10, cospi_m10_p54, &x1[20], &x1[27], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p22_p42, cospi_m42_p22, &x1[21], &x1[26], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p38_p26, cospi_m26_p38, &x1[22], &x1[25], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p06_p58, cospi_m58_p06, &x1[23], &x1[24], _r, cos_bit);
-  btf_16_adds_subs_avx2(&x1[32], &x1[33]);
-  btf_16_adds_subs_avx2(&x1[35], &x1[34]);
-  btf_16_adds_subs_avx2(&x1[36], &x1[37]);
-  btf_16_adds_subs_avx2(&x1[39], &x1[38]);
-  btf_16_adds_subs_avx2(&x1[40], &x1[41]);
-  btf_16_adds_subs_avx2(&x1[43], &x1[42]);
-  btf_16_adds_subs_avx2(&x1[44], &x1[45]);
-  btf_16_adds_subs_avx2(&x1[47], &x1[46]);
-  btf_16_adds_subs_avx2(&x1[48], &x1[49]);
-  btf_16_adds_subs_avx2(&x1[51], &x1[50]);
-  btf_16_adds_subs_avx2(&x1[52], &x1[53]);
-  btf_16_adds_subs_avx2(&x1[55], &x1[54]);
-  btf_16_adds_subs_avx2(&x1[56], &x1[57]);
-  btf_16_adds_subs_avx2(&x1[59], &x1[58]);
-  btf_16_adds_subs_avx2(&x1[60], &x1[61]);
-  btf_16_adds_subs_avx2(&x1[63], &x1[62]);
-
-  // stage 10
-  btf_16_w16_avx2(cospi_p63_p01, cospi_m01_p63, &x1[32], &x1[63], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p31_p33, cospi_m33_p31, &x1[33], &x1[62], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p47_p17, cospi_m17_p47, &x1[34], &x1[61], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p15_p49, cospi_m49_p15, &x1[35], &x1[60], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p55_p09, cospi_m09_p55, &x1[36], &x1[59], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p23_p41, cospi_m41_p23, &x1[37], &x1[58], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p39_p25, cospi_m25_p39, &x1[38], &x1[57], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p07_p57, cospi_m57_p07, &x1[39], &x1[56], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p59_p05, cospi_m05_p59, &x1[40], &x1[55], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p27_p37, cospi_m37_p27, &x1[41], &x1[54], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p43_p21, cospi_m21_p43, &x1[42], &x1[53], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p11_p53, cospi_m53_p11, &x1[43], &x1[52], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p51_p13, cospi_m13_p51, &x1[44], &x1[51], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p19_p45, cospi_m45_p19, &x1[45], &x1[50], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p35_p29, cospi_m29_p35, &x1[46], &x1[49], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p03_p61, cospi_m61_p03, &x1[47], &x1[48], _r, cos_bit);
-
-  // stage 11
-  output[0] = x1[0];
-  output[1] = x1[32];
-  output[2] = x1[16];
-  output[3] = x1[48];
-  output[4] = x1[8];
-  output[5] = x1[40];
-  output[6] = x1[24];
-  output[7] = x1[56];
-  output[8] = x1[4];
-  output[9] = x1[36];
-  output[10] = x1[20];
-  output[11] = x1[52];
-  output[12] = x1[12];
-  output[13] = x1[44];
-  output[14] = x1[28];
-  output[15] = x1[60];
-  output[16] = x1[2];
-  output[17] = x1[34];
-  output[18] = x1[18];
-  output[19] = x1[50];
-  output[20] = x1[10];
-  output[21] = x1[42];
-  output[22] = x1[26];
-  output[23] = x1[58];
-  output[24] = x1[6];
-  output[25] = x1[38];
-  output[26] = x1[22];
-  output[27] = x1[54];
-  output[28] = x1[14];
-  output[29] = x1[46];
-  output[30] = x1[30];
-  output[31] = x1[62];
-  output[32] = x1[1];
-  output[33] = x1[33];
-  output[34] = x1[17];
-  output[35] = x1[49];
-  output[36] = x1[9];
-  output[37] = x1[41];
-  output[38] = x1[25];
-  output[39] = x1[57];
-  output[40] = x1[5];
-  output[41] = x1[37];
-  output[42] = x1[21];
-  output[43] = x1[53];
-  output[44] = x1[13];
-  output[45] = x1[45];
-  output[46] = x1[29];
-  output[47] = x1[61];
-  output[48] = x1[3];
-  output[49] = x1[35];
-  output[50] = x1[19];
-  output[51] = x1[51];
-  output[52] = x1[11];
-  output[53] = x1[43];
-  output[54] = x1[27];
-  output[55] = x1[59];
-  output[56] = x1[7];
-  output[57] = x1[39];
-  output[58] = x1[23];
-  output[59] = x1[55];
-  output[60] = x1[15];
-  output[61] = x1[47];
-  output[62] = x1[31];
-  output[63] = x1[63];
-}
-
-static INLINE void av1_fdct32_new_avx2(const __m256i *input, __m256i *output,
-                                       int8_t cos_bit) {
-  __m256i x1[32];
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-  // stage 0
-  // stage 1
-  btf_32_add_sub_out_avx2(&x1[0], &x1[31], input[0], input[31]);
-  btf_32_add_sub_out_avx2(&x1[1], &x1[30], input[1], input[30]);
-  btf_32_add_sub_out_avx2(&x1[2], &x1[29], input[2], input[29]);
-  btf_32_add_sub_out_avx2(&x1[3], &x1[28], input[3], input[28]);
-  btf_32_add_sub_out_avx2(&x1[4], &x1[27], input[4], input[27]);
-  btf_32_add_sub_out_avx2(&x1[5], &x1[26], input[5], input[26]);
-  btf_32_add_sub_out_avx2(&x1[6], &x1[25], input[6], input[25]);
-  btf_32_add_sub_out_avx2(&x1[7], &x1[24], input[7], input[24]);
-  btf_32_add_sub_out_avx2(&x1[8], &x1[23], input[8], input[23]);
-  btf_32_add_sub_out_avx2(&x1[9], &x1[22], input[9], input[22]);
-  btf_32_add_sub_out_avx2(&x1[10], &x1[21], input[10], input[21]);
-  btf_32_add_sub_out_avx2(&x1[11], &x1[20], input[11], input[20]);
-  btf_32_add_sub_out_avx2(&x1[12], &x1[19], input[12], input[19]);
-  btf_32_add_sub_out_avx2(&x1[13], &x1[18], input[13], input[18]);
-  btf_32_add_sub_out_avx2(&x1[14], &x1[17], input[14], input[17]);
-  btf_32_add_sub_out_avx2(&x1[15], &x1[16], input[15], input[16]);
-
-  // stage 2
-  btf_32_add_sub_avx2(&x1[0], &x1[15]);
-  btf_32_add_sub_avx2(&x1[1], &x1[14]);
-  btf_32_add_sub_avx2(&x1[2], &x1[13]);
-  btf_32_add_sub_avx2(&x1[3], &x1[12]);
-  btf_32_add_sub_avx2(&x1[4], &x1[11]);
-  btf_32_add_sub_avx2(&x1[5], &x1[10]);
-  btf_32_add_sub_avx2(&x1[6], &x1[9]);
-  btf_32_add_sub_avx2(&x1[7], &x1[8]);
-  btf_32_avx2_type0(-cospi[32], cospi[32], &x1[20], &x1[27], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[32], cospi[32], &x1[21], &x1[26], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[32], cospi[32], &x1[22], &x1[25], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[32], cospi[32], &x1[23], &x1[24], _r, cos_bit);
-
-  // stage 3
-  btf_32_add_sub_avx2(&x1[0], &x1[7]);
-  btf_32_add_sub_avx2(&x1[1], &x1[6]);
-  btf_32_add_sub_avx2(&x1[2], &x1[5]);
-  btf_32_add_sub_avx2(&x1[3], &x1[4]);
-  btf_32_avx2_type0(-cospi[32], cospi[32], &x1[10], &x1[13], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[32], cospi[32], &x1[11], &x1[12], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[16], &x1[23]);
-  btf_32_add_sub_avx2(&x1[17], &x1[22]);
-  btf_32_add_sub_avx2(&x1[18], &x1[21]);
-  btf_32_add_sub_avx2(&x1[19], &x1[20]);
-  btf_32_add_sub_avx2(&x1[31], &x1[24]);
-  btf_32_add_sub_avx2(&x1[30], &x1[25]);
-  btf_32_add_sub_avx2(&x1[29], &x1[26]);
-  btf_32_add_sub_avx2(&x1[28], &x1[27]);
-
-  // stage 4
-  btf_32_add_sub_avx2(&x1[0], &x1[3]);
-  btf_32_add_sub_avx2(&x1[1], &x1[2]);
-  btf_32_avx2_type0(-cospi[32], cospi[32], &x1[5], &x1[6], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[8], &x1[11]);
-  btf_32_add_sub_avx2(&x1[9], &x1[10]);
-  btf_32_add_sub_avx2(&x1[15], &x1[12]);
-  btf_32_add_sub_avx2(&x1[14], &x1[13]);
-  btf_32_avx2_type0(-cospi[16], cospi[48], &x1[18], &x1[29], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[16], cospi[48], &x1[19], &x1[28], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[20], &x1[27], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[21], &x1[26], _r, cos_bit);
-
-  // stage 5
-  btf_32_avx2_type0(cospi[32], cospi[32], &x1[0], &x1[1], _r, cos_bit);
-  btf_32_avx2_type1(cospi[48], cospi[16], &x1[2], &x1[3], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[4], &x1[5]);
-  btf_32_add_sub_avx2(&x1[7], &x1[6]);
-  btf_32_avx2_type0(-cospi[16], cospi[48], &x1[9], &x1[14], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[10], &x1[13], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[16], &x1[19]);
-  btf_32_add_sub_avx2(&x1[17], &x1[18]);
-  btf_32_add_sub_avx2(&x1[23], &x1[20]);
-  btf_32_add_sub_avx2(&x1[22], &x1[21]);
-  btf_32_add_sub_avx2(&x1[24], &x1[27]);
-  btf_32_add_sub_avx2(&x1[25], &x1[26]);
-  btf_32_add_sub_avx2(&x1[31], &x1[28]);
-  btf_32_add_sub_avx2(&x1[30], &x1[29]);
-
-  // stage 6
-  btf_32_avx2_type1(cospi[56], cospi[8], &x1[4], &x1[7], _r, cos_bit);
-  btf_32_avx2_type1(cospi[24], cospi[40], &x1[5], &x1[6], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[8], &x1[9]);
-  btf_32_add_sub_avx2(&x1[11], &x1[10]);
-  btf_32_add_sub_avx2(&x1[12], &x1[13]);
-  btf_32_add_sub_avx2(&x1[15], &x1[14]);
-  btf_32_avx2_type0(-cospi[8], cospi[56], &x1[17], &x1[30], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[56], -cospi[8], &x1[18], &x1[29], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[40], cospi[24], &x1[21], &x1[26], _r, cos_bit);
-  btf_32_avx2_type0(-cospi[24], -cospi[40], &x1[22], &x1[25], _r, cos_bit);
-
-  // stage 7
-  btf_32_avx2_type1(cospi[60], cospi[4], &x1[8], &x1[15], _r, cos_bit);
-  btf_32_avx2_type1(cospi[28], cospi[36], &x1[9], &x1[14], _r, cos_bit);
-  btf_32_avx2_type1(cospi[44], cospi[20], &x1[10], &x1[13], _r, cos_bit);
-  btf_32_avx2_type1(cospi[12], cospi[52], &x1[11], &x1[12], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[16], &x1[17]);
-  btf_32_add_sub_avx2(&x1[19], &x1[18]);
-  btf_32_add_sub_avx2(&x1[20], &x1[21]);
-  btf_32_add_sub_avx2(&x1[23], &x1[22]);
-  btf_32_add_sub_avx2(&x1[24], &x1[25]);
-  btf_32_add_sub_avx2(&x1[27], &x1[26]);
-  btf_32_add_sub_avx2(&x1[28], &x1[29]);
-  btf_32_add_sub_avx2(&x1[31], &x1[30]);
-
-  // stage 8
-  btf_32_avx2_type1(cospi[62], cospi[2], &x1[16], &x1[31], _r, cos_bit);
-  btf_32_avx2_type1(cospi[30], cospi[34], &x1[17], &x1[30], _r, cos_bit);
-  btf_32_avx2_type1(cospi[46], cospi[18], &x1[18], &x1[29], _r, cos_bit);
-  btf_32_avx2_type1(cospi[14], cospi[50], &x1[19], &x1[28], _r, cos_bit);
-  btf_32_avx2_type1(cospi[54], cospi[10], &x1[20], &x1[27], _r, cos_bit);
-  btf_32_avx2_type1(cospi[22], cospi[42], &x1[21], &x1[26], _r, cos_bit);
-  btf_32_avx2_type1(cospi[38], cospi[26], &x1[22], &x1[25], _r, cos_bit);
-  btf_32_avx2_type1(cospi[6], cospi[58], &x1[23], &x1[24], _r, cos_bit);
-
-  // stage 9
-  output[0] = x1[0];
-  output[1] = x1[16];
-  output[2] = x1[8];
-  output[3] = x1[24];
-  output[4] = x1[4];
-  output[5] = x1[20];
-  output[6] = x1[12];
-  output[7] = x1[28];
-  output[8] = x1[2];
-  output[9] = x1[18];
-  output[10] = x1[10];
-  output[11] = x1[26];
-  output[12] = x1[6];
-  output[13] = x1[22];
-  output[14] = x1[14];
-  output[15] = x1[30];
-  output[16] = x1[1];
-  output[17] = x1[17];
-  output[18] = x1[9];
-  output[19] = x1[25];
-  output[20] = x1[5];
-  output[21] = x1[21];
-  output[22] = x1[13];
-  output[23] = x1[29];
-  output[24] = x1[3];
-  output[25] = x1[19];
-  output[26] = x1[11];
-  output[27] = x1[27];
-  output[28] = x1[7];
-  output[29] = x1[23];
-  output[30] = x1[15];
-  output[31] = x1[31];
-}
-
-static INLINE void av1_fdct64_new_avx2(const __m256i *input, __m256i *output,
-                                       int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
-  __m256i cospi_m32 = _mm256_set1_epi32(-cospi[32]);
-  __m256i cospi_p32 = _mm256_set1_epi32(cospi[32]);
-  __m256i cospi_m16 = _mm256_set1_epi32(-cospi[16]);
-  __m256i cospi_p48 = _mm256_set1_epi32(cospi[48]);
-  __m256i cospi_m48 = _mm256_set1_epi32(-cospi[48]);
-  __m256i cospi_p16 = _mm256_set1_epi32(cospi[16]);
-  __m256i cospi_m08 = _mm256_set1_epi32(-cospi[8]);
-  __m256i cospi_p56 = _mm256_set1_epi32(cospi[56]);
-  __m256i cospi_m56 = _mm256_set1_epi32(-cospi[56]);
-  __m256i cospi_m40 = _mm256_set1_epi32(-cospi[40]);
-  __m256i cospi_p24 = _mm256_set1_epi32(cospi[24]);
-  __m256i cospi_m24 = _mm256_set1_epi32(-cospi[24]);
-  __m256i cospi_p08 = _mm256_set1_epi32(cospi[8]);
-  __m256i cospi_p40 = _mm256_set1_epi32(cospi[40]);
-  __m256i cospi_p60 = _mm256_set1_epi32(cospi[60]);
-  __m256i cospi_p04 = _mm256_set1_epi32(cospi[4]);
-  __m256i cospi_p28 = _mm256_set1_epi32(cospi[28]);
-  __m256i cospi_p36 = _mm256_set1_epi32(cospi[36]);
-  __m256i cospi_p44 = _mm256_set1_epi32(cospi[44]);
-  __m256i cospi_p20 = _mm256_set1_epi32(cospi[20]);
-  __m256i cospi_p12 = _mm256_set1_epi32(cospi[12]);
-  __m256i cospi_p52 = _mm256_set1_epi32(cospi[52]);
-  __m256i cospi_m04 = _mm256_set1_epi32(-cospi[4]);
-  __m256i cospi_m60 = _mm256_set1_epi32(-cospi[60]);
-  __m256i cospi_m36 = _mm256_set1_epi32(-cospi[36]);
-  __m256i cospi_m28 = _mm256_set1_epi32(-cospi[28]);
-  __m256i cospi_m20 = _mm256_set1_epi32(-cospi[20]);
-  __m256i cospi_m44 = _mm256_set1_epi32(-cospi[44]);
-  __m256i cospi_m52 = _mm256_set1_epi32(-cospi[52]);
-  __m256i cospi_m12 = _mm256_set1_epi32(-cospi[12]);
-  __m256i cospi_p62 = _mm256_set1_epi32(cospi[62]);
-  __m256i cospi_p02 = _mm256_set1_epi32(cospi[2]);
-  __m256i cospi_p30 = _mm256_set1_epi32(cospi[30]);
-  __m256i cospi_p34 = _mm256_set1_epi32(cospi[34]);
-  __m256i cospi_p46 = _mm256_set1_epi32(cospi[46]);
-  __m256i cospi_p18 = _mm256_set1_epi32(cospi[18]);
-  __m256i cospi_p14 = _mm256_set1_epi32(cospi[14]);
-  __m256i cospi_p50 = _mm256_set1_epi32(cospi[50]);
-  __m256i cospi_p54 = _mm256_set1_epi32(cospi[54]);
-  __m256i cospi_p10 = _mm256_set1_epi32(cospi[10]);
-  __m256i cospi_p22 = _mm256_set1_epi32(cospi[22]);
-  __m256i cospi_p42 = _mm256_set1_epi32(cospi[42]);
-  __m256i cospi_p38 = _mm256_set1_epi32(cospi[38]);
-  __m256i cospi_p26 = _mm256_set1_epi32(cospi[26]);
-  __m256i cospi_p06 = _mm256_set1_epi32(cospi[6]);
-  __m256i cospi_p58 = _mm256_set1_epi32(cospi[58]);
-  __m256i cospi_p63 = _mm256_set1_epi32(cospi[63]);
-  __m256i cospi_p01 = _mm256_set1_epi32(cospi[1]);
-  __m256i cospi_p31 = _mm256_set1_epi32(cospi[31]);
-  __m256i cospi_p33 = _mm256_set1_epi32(cospi[33]);
-  __m256i cospi_p47 = _mm256_set1_epi32(cospi[47]);
-  __m256i cospi_p17 = _mm256_set1_epi32(cospi[17]);
-  __m256i cospi_p15 = _mm256_set1_epi32(cospi[15]);
-  __m256i cospi_p49 = _mm256_set1_epi32(cospi[49]);
-  __m256i cospi_p55 = _mm256_set1_epi32(cospi[55]);
-  __m256i cospi_p09 = _mm256_set1_epi32(cospi[9]);
-  __m256i cospi_p23 = _mm256_set1_epi32(cospi[23]);
-  __m256i cospi_p41 = _mm256_set1_epi32(cospi[41]);
-  __m256i cospi_p39 = _mm256_set1_epi32(cospi[39]);
-  __m256i cospi_p25 = _mm256_set1_epi32(cospi[25]);
-  __m256i cospi_p07 = _mm256_set1_epi32(cospi[7]);
-  __m256i cospi_p57 = _mm256_set1_epi32(cospi[57]);
-  __m256i cospi_p59 = _mm256_set1_epi32(cospi[59]);
-  __m256i cospi_p05 = _mm256_set1_epi32(cospi[5]);
-  __m256i cospi_p27 = _mm256_set1_epi32(cospi[27]);
-  __m256i cospi_p37 = _mm256_set1_epi32(cospi[37]);
-  __m256i cospi_p43 = _mm256_set1_epi32(cospi[43]);
-  __m256i cospi_p21 = _mm256_set1_epi32(cospi[21]);
-  __m256i cospi_p11 = _mm256_set1_epi32(cospi[11]);
-  __m256i cospi_p53 = _mm256_set1_epi32(cospi[53]);
-  __m256i cospi_p51 = _mm256_set1_epi32(cospi[51]);
-  __m256i cospi_p13 = _mm256_set1_epi32(cospi[13]);
-  __m256i cospi_p19 = _mm256_set1_epi32(cospi[19]);
-  __m256i cospi_p45 = _mm256_set1_epi32(cospi[45]);
-  __m256i cospi_p35 = _mm256_set1_epi32(cospi[35]);
-  __m256i cospi_p29 = _mm256_set1_epi32(cospi[29]);
-  __m256i cospi_p03 = _mm256_set1_epi32(cospi[3]);
-  __m256i cospi_p61 = _mm256_set1_epi32(cospi[61]);
-
-  // stage 1
-  __m256i x1[64];
-  btf_32_add_sub_out_avx2(&x1[0], &x1[63], input[0], input[63]);
-  btf_32_add_sub_out_avx2(&x1[1], &x1[62], input[1], input[62]);
-  btf_32_add_sub_out_avx2(&x1[2], &x1[61], input[2], input[61]);
-  btf_32_add_sub_out_avx2(&x1[3], &x1[60], input[3], input[60]);
-  btf_32_add_sub_out_avx2(&x1[4], &x1[59], input[4], input[59]);
-  btf_32_add_sub_out_avx2(&x1[5], &x1[58], input[5], input[58]);
-  btf_32_add_sub_out_avx2(&x1[6], &x1[57], input[6], input[57]);
-  btf_32_add_sub_out_avx2(&x1[7], &x1[56], input[7], input[56]);
-  btf_32_add_sub_out_avx2(&x1[8], &x1[55], input[8], input[55]);
-  btf_32_add_sub_out_avx2(&x1[9], &x1[54], input[9], input[54]);
-  btf_32_add_sub_out_avx2(&x1[10], &x1[53], input[10], input[53]);
-  btf_32_add_sub_out_avx2(&x1[11], &x1[52], input[11], input[52]);
-  btf_32_add_sub_out_avx2(&x1[12], &x1[51], input[12], input[51]);
-  btf_32_add_sub_out_avx2(&x1[13], &x1[50], input[13], input[50]);
-  btf_32_add_sub_out_avx2(&x1[14], &x1[49], input[14], input[49]);
-  btf_32_add_sub_out_avx2(&x1[15], &x1[48], input[15], input[48]);
-  btf_32_add_sub_out_avx2(&x1[16], &x1[47], input[16], input[47]);
-  btf_32_add_sub_out_avx2(&x1[17], &x1[46], input[17], input[46]);
-  btf_32_add_sub_out_avx2(&x1[18], &x1[45], input[18], input[45]);
-  btf_32_add_sub_out_avx2(&x1[19], &x1[44], input[19], input[44]);
-  btf_32_add_sub_out_avx2(&x1[20], &x1[43], input[20], input[43]);
-  btf_32_add_sub_out_avx2(&x1[21], &x1[42], input[21], input[42]);
-  btf_32_add_sub_out_avx2(&x1[22], &x1[41], input[22], input[41]);
-  btf_32_add_sub_out_avx2(&x1[23], &x1[40], input[23], input[40]);
-  btf_32_add_sub_out_avx2(&x1[24], &x1[39], input[24], input[39]);
-  btf_32_add_sub_out_avx2(&x1[25], &x1[38], input[25], input[38]);
-  btf_32_add_sub_out_avx2(&x1[26], &x1[37], input[26], input[37]);
-  btf_32_add_sub_out_avx2(&x1[27], &x1[36], input[27], input[36]);
-  btf_32_add_sub_out_avx2(&x1[28], &x1[35], input[28], input[35]);
-  btf_32_add_sub_out_avx2(&x1[29], &x1[34], input[29], input[34]);
-  btf_32_add_sub_out_avx2(&x1[30], &x1[33], input[30], input[33]);
-  btf_32_add_sub_out_avx2(&x1[31], &x1[32], input[31], input[32]);
-
-  // stage 2
-  btf_32_add_sub_avx2(&x1[0], &x1[31]);
-  btf_32_add_sub_avx2(&x1[1], &x1[30]);
-  btf_32_add_sub_avx2(&x1[2], &x1[29]);
-  btf_32_add_sub_avx2(&x1[3], &x1[28]);
-  btf_32_add_sub_avx2(&x1[4], &x1[27]);
-  btf_32_add_sub_avx2(&x1[5], &x1[26]);
-  btf_32_add_sub_avx2(&x1[6], &x1[25]);
-  btf_32_add_sub_avx2(&x1[7], &x1[24]);
-  btf_32_add_sub_avx2(&x1[8], &x1[23]);
-  btf_32_add_sub_avx2(&x1[9], &x1[22]);
-  btf_32_add_sub_avx2(&x1[10], &x1[21]);
-  btf_32_add_sub_avx2(&x1[11], &x1[20]);
-  btf_32_add_sub_avx2(&x1[12], &x1[19]);
-  btf_32_add_sub_avx2(&x1[13], &x1[18]);
-  btf_32_add_sub_avx2(&x1[14], &x1[17]);
-  btf_32_add_sub_avx2(&x1[15], &x1[16]);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[40], &x1[55], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[41], &x1[54], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[42], &x1[53], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[43], &x1[52], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[44], &x1[51], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[45], &x1[50], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[46], &x1[49], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[47], &x1[48], _r, cos_bit);
-
-  // stage 3
-  btf_32_add_sub_avx2(&x1[0], &x1[15]);
-  btf_32_add_sub_avx2(&x1[1], &x1[14]);
-  btf_32_add_sub_avx2(&x1[2], &x1[13]);
-  btf_32_add_sub_avx2(&x1[3], &x1[12]);
-  btf_32_add_sub_avx2(&x1[4], &x1[11]);
-  btf_32_add_sub_avx2(&x1[5], &x1[10]);
-  btf_32_add_sub_avx2(&x1[6], &x1[9]);
-  btf_32_add_sub_avx2(&x1[7], &x1[8]);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[20], &x1[27], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[21], &x1[26], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[22], &x1[25], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[23], &x1[24], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[32], &x1[47]);
-  btf_32_add_sub_avx2(&x1[33], &x1[46]);
-  btf_32_add_sub_avx2(&x1[34], &x1[45]);
-  btf_32_add_sub_avx2(&x1[35], &x1[44]);
-  btf_32_add_sub_avx2(&x1[36], &x1[43]);
-  btf_32_add_sub_avx2(&x1[37], &x1[42]);
-  btf_32_add_sub_avx2(&x1[38], &x1[41]);
-  btf_32_add_sub_avx2(&x1[39], &x1[40]);
-  btf_32_add_sub_avx2(&x1[63], &x1[48]);
-  btf_32_add_sub_avx2(&x1[62], &x1[49]);
-  btf_32_add_sub_avx2(&x1[61], &x1[50]);
-  btf_32_add_sub_avx2(&x1[60], &x1[51]);
-  btf_32_add_sub_avx2(&x1[59], &x1[52]);
-  btf_32_add_sub_avx2(&x1[58], &x1[53]);
-  btf_32_add_sub_avx2(&x1[57], &x1[54]);
-  btf_32_add_sub_avx2(&x1[56], &x1[55]);
-
-  // stage 4
-  btf_32_add_sub_avx2(&x1[0], &x1[7]);
-  btf_32_add_sub_avx2(&x1[1], &x1[6]);
-  btf_32_add_sub_avx2(&x1[2], &x1[5]);
-  btf_32_add_sub_avx2(&x1[3], &x1[4]);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[10], &x1[13], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[11], &x1[12], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[16], &x1[23]);
-  btf_32_add_sub_avx2(&x1[17], &x1[22]);
-  btf_32_add_sub_avx2(&x1[18], &x1[21]);
-  btf_32_add_sub_avx2(&x1[19], &x1[20]);
-  btf_32_add_sub_avx2(&x1[31], &x1[24]);
-  btf_32_add_sub_avx2(&x1[30], &x1[25]);
-  btf_32_add_sub_avx2(&x1[29], &x1[26]);
-  btf_32_add_sub_avx2(&x1[28], &x1[27]);
-  btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[36], &x1[59], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[37], &x1[58], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[38], &x1[57], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[39], &x1[56], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[40], &x1[55], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[41], &x1[54], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[42], &x1[53], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[43], &x1[52], _r, cos_bit);
-
-  // stage 5
-  btf_32_add_sub_avx2(&x1[0], &x1[3]);
-  btf_32_add_sub_avx2(&x1[1], &x1[2]);
-  btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[5], &x1[6], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[8], &x1[11]);
-  btf_32_add_sub_avx2(&x1[9], &x1[10]);
-  btf_32_add_sub_avx2(&x1[15], &x1[12]);
-  btf_32_add_sub_avx2(&x1[14], &x1[13]);
-  btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[18], &x1[29], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[19], &x1[28], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[20], &x1[27], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[21], &x1[26], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[32], &x1[39]);
-  btf_32_add_sub_avx2(&x1[33], &x1[38]);
-  btf_32_add_sub_avx2(&x1[34], &x1[37]);
-  btf_32_add_sub_avx2(&x1[35], &x1[36]);
-  btf_32_add_sub_avx2(&x1[47], &x1[40]);
-  btf_32_add_sub_avx2(&x1[46], &x1[41]);
-  btf_32_add_sub_avx2(&x1[45], &x1[42]);
-  btf_32_add_sub_avx2(&x1[44], &x1[43]);
-  btf_32_add_sub_avx2(&x1[48], &x1[55]);
-  btf_32_add_sub_avx2(&x1[49], &x1[54]);
-  btf_32_add_sub_avx2(&x1[50], &x1[53]);
-  btf_32_add_sub_avx2(&x1[51], &x1[52]);
-  btf_32_add_sub_avx2(&x1[63], &x1[56]);
-  btf_32_add_sub_avx2(&x1[62], &x1[57]);
-  btf_32_add_sub_avx2(&x1[61], &x1[58]);
-  btf_32_add_sub_avx2(&x1[60], &x1[59]);
-
-  // stage 6
-  btf_32_avx2_type0_new(cospi_p32, cospi_p32, &x1[0], &x1[1], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p48, cospi_p16, &x1[2], &x1[3], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[4], &x1[5]);
-  btf_32_add_sub_avx2(&x1[7], &x1[6]);
-  btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[9], &x1[14], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[10], &x1[13], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[16], &x1[19]);
-  btf_32_add_sub_avx2(&x1[17], &x1[18]);
-  btf_32_add_sub_avx2(&x1[23], &x1[20]);
-  btf_32_add_sub_avx2(&x1[22], &x1[21]);
-  btf_32_add_sub_avx2(&x1[24], &x1[27]);
-  btf_32_add_sub_avx2(&x1[25], &x1[26]);
-  btf_32_add_sub_avx2(&x1[31], &x1[28]);
-  btf_32_add_sub_avx2(&x1[30], &x1[29]);
-  btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[34], &x1[61], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[35], &x1[60], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[36], &x1[59], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[37], &x1[58], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[42], &x1[53], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[43], &x1[52], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[44], &x1[51], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[45], &x1[50], _r, cos_bit);
-
-  // stage 7
-  btf_32_avx2_type1_new(cospi_p56, cospi_p08, &x1[4], &x1[7], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p24, cospi_p40, &x1[5], &x1[6], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[8], &x1[9]);
-  btf_32_add_sub_avx2(&x1[11], &x1[10]);
-  btf_32_add_sub_avx2(&x1[12], &x1[13]);
-  btf_32_add_sub_avx2(&x1[15], &x1[14]);
-  btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[17], &x1[30], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[18], &x1[29], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[21], &x1[26], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[22], &x1[25], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[32], &x1[35]);
-  btf_32_add_sub_avx2(&x1[33], &x1[34]);
-  btf_32_add_sub_avx2(&x1[39], &x1[36]);
-  btf_32_add_sub_avx2(&x1[38], &x1[37]);
-  btf_32_add_sub_avx2(&x1[40], &x1[43]);
-  btf_32_add_sub_avx2(&x1[41], &x1[42]);
-  btf_32_add_sub_avx2(&x1[47], &x1[44]);
-  btf_32_add_sub_avx2(&x1[46], &x1[45]);
-  btf_32_add_sub_avx2(&x1[48], &x1[51]);
-  btf_32_add_sub_avx2(&x1[49], &x1[50]);
-  btf_32_add_sub_avx2(&x1[55], &x1[52]);
-  btf_32_add_sub_avx2(&x1[54], &x1[53]);
-  btf_32_add_sub_avx2(&x1[56], &x1[59]);
-  btf_32_add_sub_avx2(&x1[57], &x1[58]);
-  btf_32_add_sub_avx2(&x1[63], &x1[60]);
-  btf_32_add_sub_avx2(&x1[62], &x1[61]);
-
-  // stage 8
-  btf_32_avx2_type1_new(cospi_p60, cospi_p04, &x1[8], &x1[15], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p28, cospi_p36, &x1[9], &x1[14], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p44, cospi_p20, &x1[10], &x1[13], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p12, cospi_p52, &x1[11], &x1[12], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[16], &x1[17]);
-  btf_32_add_sub_avx2(&x1[19], &x1[18]);
-  btf_32_add_sub_avx2(&x1[20], &x1[21]);
-  btf_32_add_sub_avx2(&x1[23], &x1[22]);
-  btf_32_add_sub_avx2(&x1[24], &x1[25]);
-  btf_32_add_sub_avx2(&x1[27], &x1[26]);
-  btf_32_add_sub_avx2(&x1[28], &x1[29]);
-  btf_32_add_sub_avx2(&x1[31], &x1[30]);
-  btf_32_avx2_type0_new(cospi_m04, cospi_p60, &x1[33], &x1[62], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m60, cospi_m04, &x1[34], &x1[61], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m36, cospi_p28, &x1[37], &x1[58], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m28, cospi_m36, &x1[38], &x1[57], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m20, cospi_p44, &x1[41], &x1[54], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m44, cospi_m20, &x1[42], &x1[53], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m52, cospi_p12, &x1[45], &x1[50], _r, cos_bit);
-  btf_32_avx2_type0_new(cospi_m12, cospi_m52, &x1[46], &x1[49], _r, cos_bit);
-
-  // stage 9
-  btf_32_avx2_type1_new(cospi_p62, cospi_p02, &x1[16], &x1[31], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p30, cospi_p34, &x1[17], &x1[30], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p46, cospi_p18, &x1[18], &x1[29], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p14, cospi_p50, &x1[19], &x1[28], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p54, cospi_p10, &x1[20], &x1[27], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p22, cospi_p42, &x1[21], &x1[26], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p38, cospi_p26, &x1[22], &x1[25], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p06, cospi_p58, &x1[23], &x1[24], _r, cos_bit);
-  btf_32_add_sub_avx2(&x1[32], &x1[33]);
-  btf_32_add_sub_avx2(&x1[35], &x1[34]);
-  btf_32_add_sub_avx2(&x1[36], &x1[37]);
-  btf_32_add_sub_avx2(&x1[39], &x1[38]);
-  btf_32_add_sub_avx2(&x1[40], &x1[41]);
-  btf_32_add_sub_avx2(&x1[43], &x1[42]);
-  btf_32_add_sub_avx2(&x1[44], &x1[45]);
-  btf_32_add_sub_avx2(&x1[47], &x1[46]);
-  btf_32_add_sub_avx2(&x1[48], &x1[49]);
-  btf_32_add_sub_avx2(&x1[51], &x1[50]);
-  btf_32_add_sub_avx2(&x1[52], &x1[53]);
-  btf_32_add_sub_avx2(&x1[55], &x1[54]);
-  btf_32_add_sub_avx2(&x1[56], &x1[57]);
-  btf_32_add_sub_avx2(&x1[59], &x1[58]);
-  btf_32_add_sub_avx2(&x1[60], &x1[61]);
-  btf_32_add_sub_avx2(&x1[63], &x1[62]);
-
-  // stage 10
-  btf_32_avx2_type1_new(cospi_p63, cospi_p01, &x1[32], &x1[63], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p31, cospi_p33, &x1[33], &x1[62], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p47, cospi_p17, &x1[34], &x1[61], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p15, cospi_p49, &x1[35], &x1[60], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p55, cospi_p09, &x1[36], &x1[59], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p23, cospi_p41, &x1[37], &x1[58], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p39, cospi_p25, &x1[38], &x1[57], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p07, cospi_p57, &x1[39], &x1[56], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p59, cospi_p05, &x1[40], &x1[55], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p27, cospi_p37, &x1[41], &x1[54], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p43, cospi_p21, &x1[42], &x1[53], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p11, cospi_p53, &x1[43], &x1[52], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p51, cospi_p13, &x1[44], &x1[51], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p19, cospi_p45, &x1[45], &x1[50], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p35, cospi_p29, &x1[46], &x1[49], _r, cos_bit);
-  btf_32_avx2_type1_new(cospi_p03, cospi_p61, &x1[47], &x1[48], _r, cos_bit);
-
-  // stage 11
-  output[0] = x1[0];
-  output[1] = x1[32];
-  output[2] = x1[16];
-  output[3] = x1[48];
-  output[4] = x1[8];
-  output[5] = x1[40];
-  output[6] = x1[24];
-  output[7] = x1[56];
-  output[8] = x1[4];
-  output[9] = x1[36];
-  output[10] = x1[20];
-  output[11] = x1[52];
-  output[12] = x1[12];
-  output[13] = x1[44];
-  output[14] = x1[28];
-  output[15] = x1[60];
-  output[16] = x1[2];
-  output[17] = x1[34];
-  output[18] = x1[18];
-  output[19] = x1[50];
-  output[20] = x1[10];
-  output[21] = x1[42];
-  output[22] = x1[26];
-  output[23] = x1[58];
-  output[24] = x1[6];
-  output[25] = x1[38];
-  output[26] = x1[22];
-  output[27] = x1[54];
-  output[28] = x1[14];
-  output[29] = x1[46];
-  output[30] = x1[30];
-  output[31] = x1[62];
-  output[32] = x1[1];
-  output[33] = x1[33];
-  output[34] = x1[17];
-  output[35] = x1[49];
-  output[36] = x1[9];
-  output[37] = x1[41];
-  output[38] = x1[25];
-  output[39] = x1[57];
-  output[40] = x1[5];
-  output[41] = x1[37];
-  output[42] = x1[21];
-  output[43] = x1[53];
-  output[44] = x1[13];
-  output[45] = x1[45];
-  output[46] = x1[29];
-  output[47] = x1[61];
-  output[48] = x1[3];
-  output[49] = x1[35];
-  output[50] = x1[19];
-  output[51] = x1[51];
-  output[52] = x1[11];
-  output[53] = x1[43];
-  output[54] = x1[27];
-  output[55] = x1[59];
-  output[56] = x1[7];
-  output[57] = x1[39];
-  output[58] = x1[23];
-  output[59] = x1[55];
-  output[60] = x1[15];
-  output[61] = x1[47];
-  output[62] = x1[31];
-  output[63] = x1[63];
-}
-
-static INLINE void fadst16x16_new_avx2(const __m256i *input, __m256i *output,
-                                       int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m256i __zero = _mm256_setzero_si256();
-  const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1));
-
-  __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]);
-  __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]);
-  __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]);
-  __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]);
-  __m256i cospi_m48_p16 = pair_set_w16_epi16(-cospi[48], cospi[16]);
-  __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]);
-  __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]);
-  __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]);
-  __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]);
-  __m256i cospi_m56_p08 = pair_set_w16_epi16(-cospi[56], cospi[8]);
-  __m256i cospi_m24_p40 = pair_set_w16_epi16(-cospi[24], cospi[40]);
-  __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]);
-  __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]);
-  __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]);
-  __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]);
-  __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]);
-  __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]);
-  __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]);
-  __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]);
-  __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]);
-  __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]);
-  __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]);
-  __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]);
-  __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]);
-  __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]);
-  __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]);
-  __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]);
-
-  // stage 1
-  __m256i x1[16];
-  x1[0] = input[0];
-  x1[1] = _mm256_subs_epi16(__zero, input[15]);
-  x1[2] = _mm256_subs_epi16(__zero, input[7]);
-  x1[3] = input[8];
-  x1[4] = _mm256_subs_epi16(__zero, input[3]);
-  x1[5] = input[12];
-  x1[6] = input[4];
-  x1[7] = _mm256_subs_epi16(__zero, input[11]);
-  x1[8] = _mm256_subs_epi16(__zero, input[1]);
-  x1[9] = input[14];
-  x1[10] = input[6];
-  x1[11] = _mm256_subs_epi16(__zero, input[9]);
-  x1[12] = input[2];
-  x1[13] = _mm256_subs_epi16(__zero, input[13]);
-  x1[14] = _mm256_subs_epi16(__zero, input[5]);
-  x1[15] = input[10];
-
-  // stage 2
-  btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[2], &x1[3], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[6], &x1[7], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[10], &x1[11], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[14], &x1[15], _r, cos_bit);
-
-  // stage 3
-  btf_16_adds_subs_avx2(&x1[0], &x1[2]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[3]);
-  btf_16_adds_subs_avx2(&x1[4], &x1[6]);
-  btf_16_adds_subs_avx2(&x1[5], &x1[7]);
-  btf_16_adds_subs_avx2(&x1[8], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[9], &x1[11]);
-  btf_16_adds_subs_avx2(&x1[12], &x1[14]);
-  btf_16_adds_subs_avx2(&x1[13], &x1[15]);
-
-  // stage 4
-  btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[4], &x1[5], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x1[6], &x1[7], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[12], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x1[14], &x1[15], _r, cos_bit);
-
-  // stage 5
-  btf_16_adds_subs_avx2(&x1[0], &x1[4]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[5]);
-  btf_16_adds_subs_avx2(&x1[2], &x1[6]);
-  btf_16_adds_subs_avx2(&x1[3], &x1[7]);
-  btf_16_adds_subs_avx2(&x1[8], &x1[12]);
-  btf_16_adds_subs_avx2(&x1[9], &x1[13]);
-  btf_16_adds_subs_avx2(&x1[10], &x1[14]);
-  btf_16_adds_subs_avx2(&x1[11], &x1[15]);
-
-  // stage 6
-  btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, &x1[8], &x1[9], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p40_p24, cospi_p24_m40, &x1[10], &x1[11], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m56_p08, cospi_p08_p56, &x1[12], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_m24_p40, cospi_p40_p24, &x1[14], &x1[15], _r, cos_bit);
-
-  // stage 7
-  btf_16_adds_subs_avx2(&x1[0], &x1[8]);
-  btf_16_adds_subs_avx2(&x1[1], &x1[9]);
-  btf_16_adds_subs_avx2(&x1[2], &x1[10]);
-  btf_16_adds_subs_avx2(&x1[3], &x1[11]);
-  btf_16_adds_subs_avx2(&x1[4], &x1[12]);
-  btf_16_adds_subs_avx2(&x1[5], &x1[13]);
-  btf_16_adds_subs_avx2(&x1[6], &x1[14]);
-  btf_16_adds_subs_avx2(&x1[7], &x1[15]);
-
-  // stage 8
-  btf_16_w16_avx2(cospi_p02_p62, cospi_p62_m02, &x1[0], &x1[1], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p10_p54, cospi_p54_m10, &x1[2], &x1[3], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p18_p46, cospi_p46_m18, &x1[4], &x1[5], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p26_p38, cospi_p38_m26, &x1[6], &x1[7], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p34_p30, cospi_p30_m34, &x1[8], &x1[9], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p42_p22, cospi_p22_m42, &x1[10], &x1[11], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p50_p14, cospi_p14_m50, &x1[12], &x1[13], _r, cos_bit);
-  btf_16_w16_avx2(cospi_p58_p06, cospi_p06_m58, &x1[14], &x1[15], _r, cos_bit);
-
-  // stage 9
-  output[0] = x1[1];
-  output[1] = x1[14];
-  output[2] = x1[3];
-  output[3] = x1[12];
-  output[4] = x1[5];
-  output[5] = x1[10];
-  output[6] = x1[7];
-  output[7] = x1[8];
-  output[8] = x1[9];
-  output[9] = x1[6];
-  output[10] = x1[11];
-  output[11] = x1[4];
-  output[12] = x1[13];
-  output[13] = x1[2];
-  output[14] = x1[15];
-  output[15] = x1[0];
-}
-
-static INLINE __m256i scale_round_avx2(const __m256i a, const int scale) {
-  const __m256i scale__r = pair_set_w16_epi16(scale, 1 << (NewSqrt2Bits - 1));
-  const __m256i b = _mm256_madd_epi16(a, scale__r);
-  return _mm256_srai_epi32(b, NewSqrt2Bits);
-}
-
-static INLINE void fidentity16x16_new_avx2(const __m256i *input,
-                                           __m256i *output, int8_t cos_bit) {
-  (void)cos_bit;
-  const __m256i one = _mm256_set1_epi16(1);
-
-  for (int i = 0; i < 16; ++i) {
-    const __m256i a_lo = _mm256_unpacklo_epi16(input[i], one);
-    const __m256i a_hi = _mm256_unpackhi_epi16(input[i], one);
-    const __m256i b_lo = scale_round_avx2(a_lo, 2 * NewSqrt2);
-    const __m256i b_hi = scale_round_avx2(a_hi, 2 * NewSqrt2);
-    output[i] = _mm256_packs_epi32(b_lo, b_hi);
-  }
-}
-
-static INLINE void fidentity16x32_new_avx2(const __m256i *input,
-                                           __m256i *output, int8_t cos_bit) {
-  (void)cos_bit;
-  for (int i = 0; i < 32; ++i) {
-    output[i] = _mm256_slli_epi16(input[i], 2);
-  }
-}
-
-static INLINE void av1_round_shift_array_32_avx2(__m256i *input,
-                                                 __m256i *output,
-                                                 const int size,
-                                                 const int bit) {
-  if (bit > 0) {
-    int i;
-    for (i = 0; i < size; i++) {
-      output[i] = av1_round_shift_32_avx2(input[i], bit);
-    }
-  } else {
-    int i;
-    for (i = 0; i < size; i++) {
-      output[i] = _mm256_slli_epi32(input[i], -bit);
-    }
-  }
-}
-
-static INLINE void av1_round_shift_rect_array_32_avx2(__m256i *input,
-                                                      __m256i *output,
-                                                      const int size,
-                                                      const int bit) {
-  const __m256i sqrt2 = _mm256_set1_epi32(NewSqrt2);
-  if (bit > 0) {
-    int i;
-    for (i = 0; i < size; i++) {
-      const __m256i r0 = av1_round_shift_32_avx2(input[i], bit);
-      const __m256i r1 = _mm256_mullo_epi32(sqrt2, r0);
-      output[i] = av1_round_shift_32_avx2(r1, NewSqrt2Bits);
-    }
-  } else {
-    int i;
-    for (i = 0; i < size; i++) {
-      const __m256i r0 = _mm256_slli_epi32(input[i], -bit);
-      const __m256i r1 = _mm256_mullo_epi32(sqrt2, r0);
-      output[i] = av1_round_shift_32_avx2(r1, NewSqrt2Bits);
-    }
-  }
-}
-
-static INLINE void transpose_32_8x8_avx2(int stride, const __m256i *inputA,
-                                         __m256i *output) {
-  __m256i temp0 = _mm256_unpacklo_epi32(inputA[0], inputA[2]);
-  __m256i temp1 = _mm256_unpackhi_epi32(inputA[0], inputA[2]);
-  __m256i temp2 = _mm256_unpacklo_epi32(inputA[1], inputA[3]);
-  __m256i temp3 = _mm256_unpackhi_epi32(inputA[1], inputA[3]);
-  __m256i temp4 = _mm256_unpacklo_epi32(inputA[4], inputA[6]);
-  __m256i temp5 = _mm256_unpackhi_epi32(inputA[4], inputA[6]);
-  __m256i temp6 = _mm256_unpacklo_epi32(inputA[5], inputA[7]);
-  __m256i temp7 = _mm256_unpackhi_epi32(inputA[5], inputA[7]);
-
-  __m256i t0 = _mm256_unpacklo_epi32(temp0, temp2);
-  __m256i t1 = _mm256_unpackhi_epi32(temp0, temp2);
-  __m256i t2 = _mm256_unpacklo_epi32(temp1, temp3);
-  __m256i t3 = _mm256_unpackhi_epi32(temp1, temp3);
-  __m256i t4 = _mm256_unpacklo_epi32(temp4, temp6);
-  __m256i t5 = _mm256_unpackhi_epi32(temp4, temp6);
-  __m256i t6 = _mm256_unpacklo_epi32(temp5, temp7);
-  __m256i t7 = _mm256_unpackhi_epi32(temp5, temp7);
-
-  output[0 * stride] = _mm256_permute2x128_si256(t0, t4, 0x20);
-  output[1 * stride] = _mm256_permute2x128_si256(t1, t5, 0x20);
-  output[2 * stride] = _mm256_permute2x128_si256(t2, t6, 0x20);
-  output[3 * stride] = _mm256_permute2x128_si256(t3, t7, 0x20);
-  output[4 * stride] = _mm256_permute2x128_si256(t0, t4, 0x31);
-  output[5 * stride] = _mm256_permute2x128_si256(t1, t5, 0x31);
-  output[6 * stride] = _mm256_permute2x128_si256(t2, t6, 0x31);
-  output[7 * stride] = _mm256_permute2x128_si256(t3, t7, 0x31);
-}
-
-// Store 8 16 bit values. Sign extend the values.
-static INLINE void store_buffer_16bit_to_32bit_w16_avx2(const __m256i *const in,
-                                                        int32_t *out,
-                                                        const int stride,
-                                                        const int out_size) {
-  for (int i = 0; i < out_size; ++i) {
-    _mm256_store_si256((__m256i *)(out),
-                       _mm256_cvtepi16_epi32(_mm256_castsi256_si128(in[i])));
-    _mm256_store_si256(
-        (__m256i *)(out + 8),
-        _mm256_cvtepi16_epi32(_mm256_extracti128_si256(in[i], 1)));
-    out += stride;
-  }
-}
-
-static INLINE void store_rect_16bit_to_32bit_avx2(const __m256i a,
-                                                  int32_t *const b) {
-  const __m256i one = _mm256_set1_epi16(1);
-  const __m256i a_reoder = _mm256_permute4x64_epi64(a, 0xd8);
-  const __m256i a_lo = _mm256_unpacklo_epi16(a_reoder, one);
-  const __m256i a_hi = _mm256_unpackhi_epi16(a_reoder, one);
-  const __m256i b_lo = scale_round_avx2(a_lo, NewSqrt2);
-  const __m256i b_hi = scale_round_avx2(a_hi, NewSqrt2);
-  _mm256_store_si256((__m256i *)b, b_lo);
-  _mm256_store_si256((__m256i *)(b + 8), b_hi);
-}
-
-static INLINE void store_rect_buffer_16bit_to_32bit_w16_avx2(
-    const __m256i *const in, int32_t *const out, const int stride,
-    const int out_size) {
-  for (int i = 0; i < out_size; ++i) {
-    store_rect_16bit_to_32bit_avx2(in[i], out + i * stride);
-  }
-}
-
-static const transform_1d_avx2 col_txfm16x32_arr[TX_TYPES] = {
-  fdct16x32_new_avx2,       // DCT_DCT
-  NULL,                     // ADST_DCT
-  NULL,                     // DCT_ADST
-  NULL,                     // ADST_ADST
-  NULL,                     // FLIPADST_DCT
-  NULL,                     // DCT_FLIPADST
-  NULL,                     // FLIPADST_FLIPADST
-  NULL,                     // ADST_FLIPADST
-  NULL,                     // FLIPADST_ADST
-  fidentity16x32_new_avx2,  // IDTX
-  fdct16x32_new_avx2,       // V_DCT
-  fidentity16x32_new_avx2,  // H_DCT
-  NULL,                     // V_ADST
-  NULL,                     // H_ADST
-  NULL,                     // V_FLIPADST
-  NULL                      // H_FLIPADST
-};
-
-static const transform_1d_avx2 row_txfm16x32_arr[TX_TYPES] = {
-  fdct16x32_new_avx2,       // DCT_DCT
-  NULL,                     // ADST_DCT
-  NULL,                     // DCT_ADST
-  NULL,                     // ADST_ADST
-  NULL,                     // FLIPADST_DCT
-  NULL,                     // DCT_FLIPADST
-  NULL,                     // FLIPADST_FLIPADST
-  NULL,                     // ADST_FLIPADST
-  NULL,                     // FLIPADST_ADST
-  fidentity16x32_new_avx2,  // IDTX
-  fidentity16x32_new_avx2,  // V_DCT
-  fdct16x32_new_avx2,       // H_DCT
-  NULL,                     // V_ADST
-  NULL,                     // H_ADST
-  NULL,                     // V_FLIPADST
-  NULL                      // H_FLIPADST
-};
-
-static const transform_1d_avx2 col_txfm16x16_arr[TX_TYPES] = {
-  fdct16x16_new_avx2,       // DCT_DCT
-  fadst16x16_new_avx2,      // ADST_DCT
-  fdct16x16_new_avx2,       // DCT_ADST
-  fadst16x16_new_avx2,      // ADST_ADST
-  fadst16x16_new_avx2,      // FLIPADST_DCT
-  fdct16x16_new_avx2,       // DCT_FLIPADST
-  fadst16x16_new_avx2,      // FLIPADST_FLIPADST
-  fadst16x16_new_avx2,      // ADST_FLIPADST
-  fadst16x16_new_avx2,      // FLIPADST_ADST
-  fidentity16x16_new_avx2,  // IDTX
-  fdct16x16_new_avx2,       // V_DCT
-  fidentity16x16_new_avx2,  // H_DCT
-  fadst16x16_new_avx2,      // V_ADST
-  fidentity16x16_new_avx2,  // H_ADST
-  fadst16x16_new_avx2,      // V_FLIPADST
-  fidentity16x16_new_avx2   // H_FLIPADST
-};
-
-static const transform_1d_avx2 row_txfm16x16_arr[TX_TYPES] = {
-  fdct16x16_new_avx2,       // DCT_DCT
-  fdct16x16_new_avx2,       // ADST_DCT
-  fadst16x16_new_avx2,      // DCT_ADST
-  fadst16x16_new_avx2,      // ADST_ADST
-  fdct16x16_new_avx2,       // FLIPADST_DCT
-  fadst16x16_new_avx2,      // DCT_FLIPADST
-  fadst16x16_new_avx2,      // FLIPADST_FLIPADST
-  fadst16x16_new_avx2,      // ADST_FLIPADST
-  fadst16x16_new_avx2,      // FLIPADST_ADST
-  fidentity16x16_new_avx2,  // IDTX
-  fidentity16x16_new_avx2,  // V_DCT
-  fdct16x16_new_avx2,       // H_DCT
-  fidentity16x16_new_avx2,  // V_ADST
-  fadst16x16_new_avx2,      // H_ADST
-  fidentity16x16_new_avx2,  // V_FLIPADST
-  fadst16x16_new_avx2       // H_FLIPADST
-};
-
-static void lowbd_fwd_txfm2d_16x16_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  const TX_SIZE tx_size = TX_16X16;
-  __m256i buf0[16], buf1[16];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_avx2 col_txfm = col_txfm16x16_arr[tx_type];
-  const transform_1d_avx2 row_txfm = row_txfm16x16_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  const int32_t i = 0;
-  if (ud_flip) {
-    load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0, height);
-  } else {
-    load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
-  }
-  round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-  transpose_16bit_16x16_avx2(buf0, buf1 + 0 * width + 16 * i);
-
-  __m256i *buf;
-  if (lr_flip) {
-    buf = buf0;
-    flip_buf_avx2(buf1 + width * i, buf, width);
-  } else {
-    buf = buf1 + width * i;
-  }
-  row_txfm(buf, buf, cos_bit_row);
-  round_shift_16bit_w16_avx2(buf, width, shift[2]);
-  transpose_16bit_16x16_avx2(buf, buf);
-  store_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i, width, 16);
-}
-
-static void lowbd_fwd_txfm2d_32x32_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  const TX_SIZE tx_size = TX_32X32;
-  __m256i buf0[32], buf1[128];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type];
-  const transform_1d_avx2 row_txfm = row_txfm16x32_arr[tx_type];
-
-  int ud_flip, lr_flip;
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-  for (int i = 0; i < 2; i++) {
-    if (ud_flip) {
-      load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0,
-                                           height);
-    } else {
-      load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
-    }
-    round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-    transpose_16bit_16x16_avx2(buf0 + 0 * 16, buf1 + 0 * width + 16 * i);
-    transpose_16bit_16x16_avx2(buf0 + 1 * 16, buf1 + 1 * width + 16 * i);
-  }
-
-  for (int i = 0; i < 2; i++) {
-    __m256i *buf;
-    if (lr_flip) {
-      buf = buf0;
-      flip_buf_avx2(buf1 + width * i, buf, width);
-    } else {
-      buf = buf1 + width * i;
-    }
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit_w16_avx2(buf, width, shift[2]);
-    transpose_16bit_16x16_avx2(buf, buf);
-    store_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i, width,
-                                         16);
-    transpose_16bit_16x16_avx2(buf + 16, buf + 16);
-    store_buffer_16bit_to_32bit_w16_avx2(buf + 16, output + 16 * width * i + 16,
-                                         width, 16);
-  }
-}
-
-static void lowbd_fwd_txfm2d_64x64_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  (void)tx_type;
-  assert(tx_type == DCT_DCT);
-  const TX_SIZE tx_size = TX_64X64;
-  __m256i buf0[64], buf1[256];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_avx2 col_txfm = fdct16x64_new_avx2;
-  const int width_div16 = (width >> 4);
-  const int height_div16 = (height >> 4);
-
-  for (int i = 0; i < width_div16; i++) {
-    load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
-    round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-    for (int j = 0; j < AOMMIN(2, height_div16); ++j) {
-      transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
-    }
-  }
-
-  for (int i = 0; i < AOMMIN(2, height_div16); i++) {
-    __m256i bufA[64];
-    __m256i bufB[64];
-    __m128i *buf = (__m128i *)(buf1 + width * i);
-    for (int j = 0; j < width; ++j) {
-      bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]);
-      bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]);
-    }
-    av1_fdct64_new_avx2(bufA, bufA, cos_bit_row);
-    av1_fdct64_new_avx2(bufB, bufB, cos_bit_row);
-    av1_round_shift_array_32_avx2(bufA, bufA, 32, -shift[2]);
-    av1_round_shift_array_32_avx2(bufB, bufB, 32, -shift[2]);
-
-    int32_t *output8 = output + 16 * 32 * i;
-    for (int j = 0; j < 4; ++j) {
-      __m256i *out = (__m256i *)(output8 + 8 * j);
-      transpose_32_8x8_avx2(4, bufA + 8 * j, out);
-      transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4);
-    }
-  }
-}
-
-static void lowbd_fwd_txfm2d_16x32_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  const TX_SIZE tx_size = TX_16X32;
-  __m256i buf0[32], buf1[32];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type];
-  const transform_1d_avx2 row_txfm = row_txfm16x16_arr[tx_type];
-
-  int ud_flip, lr_flip;
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-  if (ud_flip) {
-    load_buffer_16bit_to_16bit_flip_avx2(input, stride, buf0, height);
-  } else {
-    load_buffer_16bit_to_16bit_avx2(input, stride, buf0, height);
-  }
-  round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-  transpose_16bit_16x16_avx2(buf0, buf1);
-  transpose_16bit_16x16_avx2(buf0 + 16, buf1 + 16);
-
-  for (int i = 0; i < 2; i++) {
-    __m256i *buf;
-    if (lr_flip) {
-      buf = buf0;
-      flip_buf_avx2(buf1 + width * i, buf, width);
-    } else {
-      buf = buf1 + width * i;
-    }
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit_w16_avx2(buf, width, shift[2]);
-    transpose_16bit_16x16_avx2(buf, buf);
-    store_rect_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i,
-                                              width, 16);
-  }
-}
-
-static void lowbd_fwd_txfm2d_32x16_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m256i buf0[32], buf1[64];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_32X16];
-  const int txw_idx = get_txw_idx(TX_32X16);
-  const int txh_idx = get_txh_idx(TX_32X16);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 32;
-  const int height = 16;
-  const transform_1d_avx2 col_txfm = col_txfm16x16_arr[tx_type];
-  const transform_1d_avx2 row_txfm = row_txfm16x32_arr[tx_type];
-
-  int ud_flip, lr_flip;
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-  for (int i = 0; i < 2; i++) {
-    if (ud_flip) {
-      load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0,
-                                           height);
-    } else {
-      load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
-    }
-    round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-    transpose_16bit_16x16_avx2(buf0, buf1 + 0 * width + 16 * i);
-  }
-
-  __m256i *buf;
-  if (lr_flip) {
-    buf = buf0;
-    flip_buf_avx2(buf1, buf, width);
-  } else {
-    buf = buf1;
-  }
-  row_txfm(buf, buf, cos_bit_row);
-  round_shift_16bit_w16_avx2(buf, width, shift[2]);
-  transpose_16bit_16x16_avx2(buf, buf);
-  store_rect_buffer_16bit_to_32bit_w16_avx2(buf, output, width, 16);
-
-  transpose_16bit_16x16_avx2(buf + 16, buf + 16);
-  store_rect_buffer_16bit_to_32bit_w16_avx2(buf + 16, output + 16, width, 16);
-}
-
-static void lowbd_fwd_txfm2d_64x32_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  const TX_SIZE tx_size = TX_64X32;
-  __m256i buf0[64], buf1[256];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type];
-  const int width_div16 = (width >> 4);
-  const int height_div16 = (height >> 4);
-
-  for (int i = 0; i < width_div16; i++) {
-    load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
-    round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-    for (int j = 0; j < AOMMIN(4, height_div16); ++j) {
-      transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
-    }
-  }
-  assert(tx_type == DCT_DCT);
-  for (int i = 0; i < AOMMIN(2, height_div16); i++) {
-    __m256i bufA[64];
-    __m256i bufB[64];
-    __m128i *buf = (__m128i *)(buf1 + width * i);
-    for (int j = 0; j < width; ++j) {
-      bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]);
-      bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]);
-    }
-    av1_fdct64_new_avx2(bufA, bufA, cos_bit_row);
-    av1_fdct64_new_avx2(bufB, bufB, cos_bit_row);
-    av1_round_shift_rect_array_32_avx2(bufA, bufA, 32, -shift[2]);
-    av1_round_shift_rect_array_32_avx2(bufB, bufB, 32, -shift[2]);
-
-    int32_t *output8 = output + 16 * 32 * i;
-    for (int j = 0; j < 4; ++j) {
-      __m256i *out = (__m256i *)(output8 + 8 * j);
-      transpose_32_8x8_avx2(4, bufA + 8 * j, out);
-      transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4);
-    }
-  }
-}
-
-static void lowbd_fwd_txfm2d_32x64_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  (void)tx_type;
-  assert(tx_type == DCT_DCT);
-  const TX_SIZE tx_size = TX_32X64;
-  __m256i buf0[64], buf1[256];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_avx2 col_txfm = fdct16x64_new_avx2;
-  const int width_div16 = (width >> 4);
-  const int height_div16 = (height >> 4);
-
-  for (int i = 0; i < width_div16; i++) {
-    load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
-    round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-    for (int j = 0; j < AOMMIN(2, height_div16); ++j) {
-      transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
-    }
-  }
-
-  for (int i = 0; i < AOMMIN(2, height_div16); i++) {
-    __m256i bufA[32];
-    __m256i bufB[32];
-    __m128i *buf = (__m128i *)(buf1 + width * i);
-    for (int j = 0; j < width; ++j) {
-      bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]);
-      bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]);
-    }
-    av1_fdct32_new_avx2(bufA, bufA, cos_bit_row);
-    av1_fdct32_new_avx2(bufB, bufB, cos_bit_row);
-    av1_round_shift_rect_array_32_avx2(bufA, bufA, 32, -shift[2]);
-    av1_round_shift_rect_array_32_avx2(bufB, bufB, 32, -shift[2]);
-
-    int32_t *output8 = output + 16 * 32 * i;
-    for (int j = 0; j < 4; ++j) {
-      __m256i *out = (__m256i *)(output8 + 8 * j);
-      transpose_32_8x8_avx2(4, bufA + 8 * j, out);
-      transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4);
-    }
-  }
-}
-
-static void lowbd_fwd_txfm2d_16x64_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  (void)tx_type;
-  assert(tx_type == DCT_DCT);
-  const TX_SIZE tx_size = TX_16X64;
-  __m256i buf0[64], buf1[64];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_avx2 col_txfm = fdct16x64_new_avx2;
-  const transform_1d_avx2 row_txfm = fdct16x16_new_avx2;
-  const int width_div16 = (width >> 4);
-  const int height_div16 = (height >> 4);
-
-  for (int i = 0; i < width_div16; i++) {
-    load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
-    round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-    for (int j = 0; j < height_div16; ++j) {
-      transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
-    }
-  }
-
-  for (int i = 0; i < AOMMIN(4, height_div16); i++) {
-    __m256i *buf = buf1 + width * i;
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit_w16_avx2(buf, width, shift[2]);
-    int32_t *output16 = output + 16 * width * i;
-    for (int j = 0; j < width_div16; ++j) {
-      __m256i *buf16 = buf + 16 * j;
-      transpose_16bit_16x16_avx2(buf16, buf16);
-      store_buffer_16bit_to_32bit_w16_avx2(buf16, output16 + 16 * j, width, 16);
-    }
-  }
-  // Zero out the bottom 16x32 area.
-  memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
-}
-
-static void lowbd_fwd_txfm2d_64x16_avx2(const int16_t *input, int32_t *output,
-                                        int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  (void)tx_type;
-  assert(tx_type == DCT_DCT);
-  const TX_SIZE tx_size = TX_64X16;
-  __m256i buf0[64], buf1[64];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_avx2 col_txfm = fdct16x16_new_avx2;
-  const transform_1d_avx2 row_txfm = fdct16x64_new_avx2;
-  const int width_div16 = (width >> 4);
-  const int height_div16 = (height >> 4);
-
-  for (int i = 0; i < width_div16; i++) {
-    load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height);
-    round_shift_16bit_w16_avx2(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit_w16_avx2(buf0, height, shift[1]);
-    for (int j = 0; j < height_div16; ++j) {
-      transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i);
-    }
-  }
-
-  for (int i = 0; i < height_div16; i++) {
-    __m256i *buf = buf1 + width * i;
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit_w16_avx2(buf, width, shift[2]);
-    int32_t *output16 = output + 16 * 32 * i;
-    for (int j = 0; j < 2; ++j) {
-      __m256i *buf16 = buf + 16 * j;
-      transpose_16bit_16x16_avx2(buf16, buf16);
-      store_buffer_16bit_to_32bit_w16_avx2(buf16, output16 + 16 * j, 32, 16);
-    }
-  }
-}
-
-static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
-  av1_lowbd_fwd_txfm2d_4x4_sse2,   // 4x4 transform
-  av1_lowbd_fwd_txfm2d_8x8_sse2,   // 8x8 transform
-  lowbd_fwd_txfm2d_16x16_avx2,     // 16x16 transform
-  lowbd_fwd_txfm2d_32x32_avx2,     // 32x32 transform
-  lowbd_fwd_txfm2d_64x64_avx2,     // 64x64 transform
-  av1_lowbd_fwd_txfm2d_4x8_sse2,   // 4x8 transform
-  av1_lowbd_fwd_txfm2d_8x4_sse2,   // 8x4 transform
-  av1_lowbd_fwd_txfm2d_8x16_sse2,  // 8x16 transform
-  av1_lowbd_fwd_txfm2d_16x8_sse2,  // 16x8 transform
-  lowbd_fwd_txfm2d_16x32_avx2,     // 16x32 transform
-  lowbd_fwd_txfm2d_32x16_avx2,     // 32x16 transform
-  lowbd_fwd_txfm2d_32x64_avx2,     // 32x64 transform
-  lowbd_fwd_txfm2d_64x32_avx2,     // 64x32 transform
-  av1_lowbd_fwd_txfm2d_4x16_sse2,  // 4x16 transform
-  av1_lowbd_fwd_txfm2d_16x4_sse2,  // 16x4 transform
-  av1_lowbd_fwd_txfm2d_8x32_sse2,  // 8x32 transform
-  av1_lowbd_fwd_txfm2d_32x8_sse2,  // 32x8 transform
-  lowbd_fwd_txfm2d_16x64_avx2,     // 16x64 transform
-  lowbd_fwd_txfm2d_64x16_avx2,     // 64x16 transform
-};
-
-void av1_lowbd_fwd_txfm_avx2(const int16_t *src_diff, tran_low_t *coeff,
-                             int diff_stride, TxfmParam *txfm_param) {
-  FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size];
-  if ((fwd_txfm2d_func == NULL) ||
-      (txfm_param->lossless && txfm_param->tx_size == TX_4X4)) {
-    av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
-  } else {
-    fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type,
-                    txfm_param->bd);
-  }
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c
deleted file mode 100644
index 8ec0256eb..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "av1/common/enums.h"
-#include "av1/common/av1_txfm.h"
-#include "av1/common/x86/av1_txfm_sse2.h"
-#include "av1/common/x86/highbd_txfm_utility_sse4.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-#include "av1/encoder/x86/av1_txfm1d_sse4.h"
-#include "av1/encoder/x86/av1_fwd_txfm_sse2.h"
-
-static INLINE void int16_array_with_stride_to_int32_array_without_stride(
-    const int16_t *input, int stride, int32_t *output, int txfm1d_size) {
-  int r, c;
-  for (r = 0; r < txfm1d_size; r++) {
-    for (c = 0; c < txfm1d_size; c++) {
-      output[r * txfm1d_size + c] = (int32_t)input[r * stride + c];
-    }
-  }
-}
-
-typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
-                             const int8_t cos_bit, const int8_t *stage_range);
-
-static void fdct32_new_sse4_1(const __m128i *input, __m128i *output,
-                              const int8_t cos_bit, const int8_t *stage_range) {
-  const int txfm_size = 32;
-  const int num_per_128 = 4;
-  __m128i buf0[32];
-  __m128i buf1[32];
-  int col_num = txfm_size / num_per_128;
-  int col;
-  (void)stage_range;
-  for (col = 0; col < col_num; col++) {
-    int j;
-    for (j = 0; j < 32; ++j) {
-      buf0[j] = input[j * col_num + col];
-    }
-    av1_fdct32_new_sse4_1(buf0, buf1, cos_bit);
-    for (j = 0; j < 32; ++j) {
-      output[j * col_num + col] = buf1[j];
-    }
-  }
-}
-
-static void fdct64_new_sse4_1(const __m128i *input, __m128i *output,
-                              const int8_t cos_bit, const int8_t *stage_range) {
-  const int txfm_size = 64;
-  const int num_per_128 = 4;
-  int col_num = txfm_size / num_per_128;
-  (void)stage_range;
-  for (int col = 0; col < col_num; col++) {
-    av1_fdct64_new_sse4_1((input + col), (output + col), cos_bit, col_num,
-                          col_num);
-  }
-}
-
-static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
-  switch (txfm_type) {
-    case TXFM_TYPE_DCT32: return fdct32_new_sse4_1; break;
-    case TXFM_TYPE_DCT64: return fdct64_new_sse4_1; break;
-    default: assert(0);
-  }
-  return NULL;
-}
-
-static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
-                                     const int stride,
-                                     const TXFM_2D_FLIP_CFG *cfg,
-                                     int32_t *txfm_buf) {
-  // TODO(sarahparker) This does not currently support rectangular transforms
-  // and will break without splitting txfm_size out into row and col size.
-  // Rectangular transforms use c code only, so it should be ok for now.
-  // It will be corrected when there are sse implementations for rectangular
-  // transforms.
-  assert(cfg->tx_size < TX_SIZES);
-  const int txfm_size = tx_size_wide[cfg->tx_size];
-  const int8_t *shift = cfg->shift;
-  const int8_t *stage_range_col = cfg->stage_range_col;
-  const int8_t *stage_range_row = cfg->stage_range_row;
-  const int8_t cos_bit_col = cfg->cos_bit_col;
-  const int8_t cos_bit_row = cfg->cos_bit_row;
-  const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
-  const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
-
-  __m128i *buf_128 = (__m128i *)txfm_buf;
-  __m128i *out_128 = (__m128i *)output;
-  int num_per_128 = 4;
-  int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
-
-  int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf,
-                                                        txfm_size);
-  av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[0]);
-  txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
-  av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
-  transpose_32(txfm_size, out_128, buf_128);
-  txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row);
-  av1_round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
-  transpose_32(txfm_size, buf_128, out_128);
-}
-
-static INLINE void fwd_txfm2d_64x64_sse4_1(const int16_t *input,
-                                           int32_t *output, const int stride,
-                                           const TXFM_2D_FLIP_CFG *cfg,
-                                           int32_t *txfm_buf) {
-  assert(cfg->tx_size < TX_SIZES);
-  const int txfm_size = tx_size_wide[cfg->tx_size];
-  const int8_t *shift = cfg->shift;
-  const int8_t *stage_range_col = cfg->stage_range_col;
-  const int8_t cos_bit_col = cfg->cos_bit_col;
-  const int8_t cos_bit_row = cfg->cos_bit_row;
-  const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
-  __m128i *buf_128 = (__m128i *)txfm_buf;
-  __m128i *out_128 = (__m128i *)output;
-
-  const int num_per_128 = 4;
-  int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
-  int col_num = txfm_size / num_per_128;
-
-  int16_array_with_stride_to_int32_array_without_stride(input, stride, output,
-                                                        txfm_size);
-  /*col wise transform*/
-  txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
-  av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
-  transpose_32(txfm_size, out_128, buf_128);
-
-  /*row wise transform*/
-  for (int col = 0; col < (col_num >> 1); col++) {
-    av1_fdct64_new_sse4_1((buf_128 + col), (out_128 + col), cos_bit_row,
-                          col_num, (col_num >> 1));
-  }
-
-  txfm2d_size_128 = (col_num >> 1) * (txfm_size >> 1);
-  av1_round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
-  transpose_32x32(buf_128, out_128);
-}
-
-void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
-                                 int stride, TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg);
-  (void)bd;
-  fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
-}
-
-void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
-                                 int stride, TX_TYPE tx_type, int bd) {
-  DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
-  TXFM_2D_FLIP_CFG cfg;
-  av1_get_fwd_txfm_cfg(tx_type, TX_64X64, &cfg);
-  (void)bd;
-  fwd_txfm2d_64x64_sse4_1(input, output, stride, &cfg, txfm_buf);
-}
-
-static INLINE void transpose_32_4x4x2(int stride, const __m128i *inputA,
-                                      const __m128i *inputB, __m128i *output) {
-  __m128i temp0 = _mm_unpacklo_epi32(inputA[0], inputA[2]);
-  __m128i temp1 = _mm_unpackhi_epi32(inputA[0], inputA[2]);
-  __m128i temp2 = _mm_unpacklo_epi32(inputA[1], inputA[3]);
-  __m128i temp3 = _mm_unpackhi_epi32(inputA[1], inputA[3]);
-
-  output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2);
-  output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2);
-  output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3);
-  output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3);
-
-  temp0 = _mm_unpacklo_epi32(inputB[0], inputB[2]);
-  temp1 = _mm_unpackhi_epi32(inputB[0], inputB[2]);
-  temp2 = _mm_unpacklo_epi32(inputB[1], inputB[3]);
-  temp3 = _mm_unpackhi_epi32(inputB[1], inputB[3]);
-
-  output[4 * stride] = _mm_unpacklo_epi32(temp0, temp2);
-  output[5 * stride] = _mm_unpackhi_epi32(temp0, temp2);
-  output[6 * stride] = _mm_unpacklo_epi32(temp1, temp3);
-  output[7 * stride] = _mm_unpackhi_epi32(temp1, temp3);
-}
-
-static void lowbd_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
-                                          int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  (void)tx_type;
-  assert(tx_type == DCT_DCT);
-  const TX_SIZE tx_size = TX_64X64;
-  __m128i buf0[64], buf1[512];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_sse2 col_txfm = fdct8x64_new_sse2;
-  const int width_div8 = (width >> 3);
-  const int height_div8 = (height >> 3);
-
-  for (int i = 0; i < width_div8; i++) {
-    load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-    round_shift_16bit(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit(buf0, height, shift[1]);
-    for (int j = 0; j < AOMMIN(4, height_div8); ++j) {
-      transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
-    }
-  }
-  for (int i = 0; i < AOMMIN(4, height_div8); i++) {
-    __m128i bufA[64];
-    __m128i bufB[64];
-    __m128i *buf = buf1 + width * i;
-    for (int j = 0; j < width; ++j) {
-      bufA[j] = _mm_cvtepi16_epi32(buf[j]);
-      bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
-    }
-    av1_fdct64_new_sse4_1(bufA, bufA, cos_bit_row, 1, 1);
-    av1_fdct64_new_sse4_1(bufB, bufB, cos_bit_row, 1, 1);
-    av1_round_shift_array_32_sse4_1(bufA, bufA, 32, -shift[2]);
-    av1_round_shift_array_32_sse4_1(bufB, bufB, 32, -shift[2]);
-
-    int32_t *output8 = output + 8 * 32 * i;
-    for (int j = 0; j < width_div8; ++j) {
-      __m128i *out = (__m128i *)(output8 + 4 * j);
-      transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out);
-    }
-  }
-}
-
-static void lowbd_fwd_txfm2d_64x32_sse4_1(const int16_t *input, int32_t *output,
-                                          int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  const TX_SIZE tx_size = TX_64X32;
-  __m128i buf0[64], buf1[256];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type];
-  const int width_div8 = (width >> 3);
-  const int height_div8 = (height >> 3);
-
-  for (int i = 0; i < width_div8; i++) {
-    load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-    round_shift_16bit(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit(buf0, height, shift[1]);
-    for (int j = 0; j < AOMMIN(4, height_div8); ++j) {
-      transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
-    }
-  }
-  assert(tx_type == DCT_DCT);
-  for (int i = 0; i < AOMMIN(4, height_div8); i++) {
-    __m128i bufA[64];
-    __m128i bufB[64];
-    __m128i *buf = buf1 + width * i;
-    for (int j = 0; j < width; ++j) {
-      bufA[j] = _mm_cvtepi16_epi32(buf[j]);
-      bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
-    }
-    av1_fdct64_new_sse4_1(bufA, bufA, cos_bit_row, 1, 1);
-    av1_fdct64_new_sse4_1(bufB, bufB, cos_bit_row, 1, 1);
-    av1_round_shift_rect_array_32_sse4_1(bufA, bufA, 32, -shift[2], NewSqrt2);
-    av1_round_shift_rect_array_32_sse4_1(bufB, bufB, 32, -shift[2], NewSqrt2);
-
-    int32_t *output8 = output + 8 * 32 * i;
-    for (int j = 0; j < width_div8; ++j) {
-      __m128i *out = (__m128i *)(output8 + 4 * j);
-      transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out);
-    }
-  }
-}
-
-static void lowbd_fwd_txfm2d_32x64_sse4_1(const int16_t *input, int32_t *output,
-                                          int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  (void)tx_type;
-  assert(tx_type == DCT_DCT);
-  const TX_SIZE tx_size = TX_32X64;
-  __m128i buf0[64], buf1[256];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_sse2 col_txfm = fdct8x64_new_sse2;
-  const int width_div8 = (width >> 3);
-  const int height_div8 = (height >> 3);
-
-  for (int i = 0; i < width_div8; i++) {
-    load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-    round_shift_16bit(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit(buf0, height, shift[1]);
-    for (int j = 0; j < AOMMIN(4, height_div8); ++j) {
-      transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
-    }
-  }
-
-  for (int i = 0; i < AOMMIN(4, height_div8); i++) {
-    __m128i bufA[32];
-    __m128i bufB[32];
-    __m128i *buf = buf1 + width * i;
-    for (int j = 0; j < width; ++j) {
-      bufA[j] = _mm_cvtepi16_epi32(buf[j]);
-      bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j]));
-    }
-    av1_fdct32_new_sse4_1(bufA, bufA, cos_bit_row);
-    av1_fdct32_new_sse4_1(bufB, bufB, cos_bit_row);
-    av1_round_shift_rect_array_32_sse4_1(bufA, bufA, 32, -shift[2], NewSqrt2);
-    av1_round_shift_rect_array_32_sse4_1(bufB, bufB, 32, -shift[2], NewSqrt2);
-
-    int32_t *output8 = output + 8 * 32 * i;
-    for (int j = 0; j < (32 / 4); ++j) {
-      __m128i *out = (__m128i *)(output8 + 4 * j);
-      transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out);
-    }
-  }
-}
-
-static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
-  av1_lowbd_fwd_txfm2d_4x4_sse2,    // 4x4 transform
-  av1_lowbd_fwd_txfm2d_8x8_sse2,    // 8x8 transform
-  av1_lowbd_fwd_txfm2d_16x16_sse2,  // 16x16 transform
-  av1_lowbd_fwd_txfm2d_32x32_sse2,  // 32x32 transform
-  lowbd_fwd_txfm2d_64x64_sse4_1,    // 64x64 transform
-  av1_lowbd_fwd_txfm2d_4x8_sse2,    // 4x8 transform
-  av1_lowbd_fwd_txfm2d_8x4_sse2,    // 8x4 transform
-  av1_lowbd_fwd_txfm2d_8x16_sse2,   // 8x16 transform
-  av1_lowbd_fwd_txfm2d_16x8_sse2,   // 16x8 transform
-  av1_lowbd_fwd_txfm2d_16x32_sse2,  // 16x32 transform
-  av1_lowbd_fwd_txfm2d_32x16_sse2,  // 32x16 transform
-  lowbd_fwd_txfm2d_32x64_sse4_1,    // 32x64 transform
-  lowbd_fwd_txfm2d_64x32_sse4_1,    // 64x32 transform
-  av1_lowbd_fwd_txfm2d_4x16_sse2,   // 4x16 transform
-  av1_lowbd_fwd_txfm2d_16x4_sse2,   // 16x4 transform
-  av1_lowbd_fwd_txfm2d_8x32_sse2,   // 8x32 transform
-  av1_lowbd_fwd_txfm2d_32x8_sse2,   // 32x8 transform
-  av1_lowbd_fwd_txfm2d_16x64_sse2,  // 16x64 transform
-  av1_lowbd_fwd_txfm2d_64x16_sse2,  // 64x16 transform
-};
-
-void av1_lowbd_fwd_txfm_sse4_1(const int16_t *src_diff, tran_low_t *coeff,
-                               int diff_stride, TxfmParam *txfm_param) {
-  FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size];
-  if ((fwd_txfm2d_func == NULL) ||
-      (txfm_param->lossless && txfm_param->tx_size == TX_4X4)) {
-    av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
-  } else {
-    fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type,
-                    txfm_param->bd);
-  }
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h
deleted file mode 100644
index 38707137c..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_
-#define AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_
-#include <immintrin.h>
-
-static INLINE __m256i av1_round_shift_32_avx2(__m256i vec, int bit) {
-  __m256i tmp, round;
-  round = _mm256_set1_epi32(1 << (bit - 1));
-  tmp = _mm256_add_epi32(vec, round);
-  return _mm256_srai_epi32(tmp, bit);
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-static INLINE void btf_32_avx2_type0(const int32_t w0, const int32_t w1,
-                                     __m256i *in0, __m256i *in1,
-                                     const __m256i _r, const int32_t cos_bit) {
-  __m256i _in0 = *in0;
-  __m256i _in1 = *in1;
-  const __m256i ww0 = _mm256_set1_epi32(w0);
-  const __m256i ww1 = _mm256_set1_epi32(w1);
-  const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
-  const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
-  __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
-  temp0 = _mm256_add_epi32(temp0, _r);
-  *in0 = _mm256_srai_epi32(temp0, cos_bit);
-  const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
-  const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
-  __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0);
-  temp1 = _mm256_add_epi32(temp1, _r);
-  *in1 = _mm256_srai_epi32(temp1, cos_bit);
-}
-
-static INLINE void btf_32_avx2_type1(const int32_t w0, const int32_t w1,
-                                     __m256i *in0, __m256i *in1,
-                                     const __m256i _r, const int32_t cos_bit) {
-  __m256i _in0 = *in0;
-  __m256i _in1 = *in1;
-  const __m256i ww0 = _mm256_set1_epi32(w0);
-  const __m256i ww1 = _mm256_set1_epi32(w1);
-  const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
-  const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
-  __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
-  temp0 = _mm256_add_epi32(temp0, _r);
-  *in0 = _mm256_srai_epi32(temp0, cos_bit);
-  const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
-  const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
-  __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1);
-  temp1 = _mm256_add_epi32(temp1, _r);
-  *in1 = _mm256_srai_epi32(temp1, cos_bit);
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-static INLINE void btf_32_avx2_type0_new(const __m256i ww0, const __m256i ww1,
-                                         __m256i *in0, __m256i *in1,
-                                         const __m256i _r,
-                                         const int32_t cos_bit) {
-  __m256i _in0 = *in0;
-  __m256i _in1 = *in1;
-  const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
-  const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
-  __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
-  temp0 = _mm256_add_epi32(temp0, _r);
-  *in0 = _mm256_srai_epi32(temp0, cos_bit);
-  const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
-  const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
-  __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0);
-  temp1 = _mm256_add_epi32(temp1, _r);
-  *in1 = _mm256_srai_epi32(temp1, cos_bit);
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = in1*w0 - in0*w1
-static INLINE void btf_32_avx2_type1_new(const __m256i ww0, const __m256i ww1,
-                                         __m256i *in0, __m256i *in1,
-                                         const __m256i _r,
-                                         const int32_t cos_bit) {
-  __m256i _in0 = *in0;
-  __m256i _in1 = *in1;
-  const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0);
-  const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1);
-  __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1);
-  temp0 = _mm256_add_epi32(temp0, _r);
-  *in0 = _mm256_srai_epi32(temp0, cos_bit);
-  const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1);
-  const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0);
-  __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1);
-  temp1 = _mm256_add_epi32(temp1, _r);
-  *in1 = _mm256_srai_epi32(temp1, cos_bit);
-}
-
-#endif  // AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c
deleted file mode 100644
index 6aae7ce1e..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c
+++ /dev/null
@@ -1,2889 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/common/x86/av1_txfm_sse2.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-#include "av1/encoder/x86/av1_fwd_txfm_sse2.h"
-
-// TODO(linfengz): refine fdct4x8 and fadst4x8 optimization (if possible).
-
-static void fdct4x4_new_sse2(const __m128i *input, __m128i *output,
-                             int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
-  const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-  __m128i u[4], v[4];
-
-  u[0] = _mm_unpacklo_epi16(input[0], input[1]);
-  u[1] = _mm_unpacklo_epi16(input[3], input[2]);
-
-  v[0] = _mm_add_epi16(u[0], u[1]);
-  v[1] = _mm_sub_epi16(u[0], u[1]);
-
-  u[0] = _mm_madd_epi16(v[0], cospi_p32_p32);  // 0
-  u[1] = _mm_madd_epi16(v[0], cospi_p32_m32);  // 2
-  u[2] = _mm_madd_epi16(v[1], cospi_p16_p48);  // 1
-  u[3] = _mm_madd_epi16(v[1], cospi_p48_m16);  // 3
-
-  v[0] = _mm_add_epi32(u[0], __rounding);
-  v[1] = _mm_add_epi32(u[1], __rounding);
-  v[2] = _mm_add_epi32(u[2], __rounding);
-  v[3] = _mm_add_epi32(u[3], __rounding);
-  u[0] = _mm_srai_epi32(v[0], cos_bit);
-  u[1] = _mm_srai_epi32(v[1], cos_bit);
-  u[2] = _mm_srai_epi32(v[2], cos_bit);
-  u[3] = _mm_srai_epi32(v[3], cos_bit);
-
-  output[0] = _mm_packs_epi32(u[0], u[1]);
-  output[1] = _mm_packs_epi32(u[2], u[3]);
-  output[2] = _mm_srli_si128(output[0], 8);
-  output[3] = _mm_srli_si128(output[1], 8);
-}
-
-static void fdct8x4_new_sse2(const __m128i *input, __m128i *output,
-                             int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
-  __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
-
-  // stage 1
-  __m128i x1[4];
-  x1[0] = _mm_adds_epi16(input[0], input[3]);
-  x1[3] = _mm_subs_epi16(input[0], input[3]);
-  x1[1] = _mm_adds_epi16(input[1], input[2]);
-  x1[2] = _mm_subs_epi16(input[1], input[2]);
-
-  // stage 2
-  __m128i x2[4];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[0], x1[1], x2[0], x2[1]);
-  btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x1[2], x1[3], x2[2], x2[3]);
-
-  // stage 3
-  output[0] = x2[0];
-  output[1] = x2[2];
-  output[2] = x2[1];
-  output[3] = x2[3];
-}
-
-static void fdct4x8_new_sse2(const __m128i *input, __m128i *output,
-                             int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
-  __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
-  __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
-  __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
-  __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
-  __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
-
-  // stage 1
-  __m128i x1[8];
-  x1[0] = _mm_adds_epi16(input[0], input[7]);
-  x1[7] = _mm_subs_epi16(input[0], input[7]);
-  x1[1] = _mm_adds_epi16(input[1], input[6]);
-  x1[6] = _mm_subs_epi16(input[1], input[6]);
-  x1[2] = _mm_adds_epi16(input[2], input[5]);
-  x1[5] = _mm_subs_epi16(input[2], input[5]);
-  x1[3] = _mm_adds_epi16(input[3], input[4]);
-  x1[4] = _mm_subs_epi16(input[3], input[4]);
-
-  // stage 2
-  __m128i x2[8];
-  x2[0] = _mm_adds_epi16(x1[0], x1[3]);
-  x2[3] = _mm_subs_epi16(x1[0], x1[3]);
-  x2[1] = _mm_adds_epi16(x1[1], x1[2]);
-  x2[2] = _mm_subs_epi16(x1[1], x1[2]);
-  x2[4] = x1[4];
-  btf_16_w4_sse2(&cospi_m32_p32, &cospi_p32_p32, __rounding, cos_bit, &x1[5],
-                 &x1[6], &x2[5], &x2[6]);
-  x2[7] = x1[7];
-
-  // stage 3
-  __m128i x3[8];
-  btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x2[0],
-                 &x2[1], &x3[0], &x3[1]);
-  btf_16_w4_sse2(&cospi_p48_p16, &cospi_m16_p48, __rounding, cos_bit, &x2[2],
-                 &x2[3], &x3[2], &x3[3]);
-  x3[4] = _mm_adds_epi16(x2[4], x2[5]);
-  x3[5] = _mm_subs_epi16(x2[4], x2[5]);
-  x3[6] = _mm_subs_epi16(x2[7], x2[6]);
-  x3[7] = _mm_adds_epi16(x2[7], x2[6]);
-
-  // stage 4
-  __m128i x4[8];
-  x4[0] = x3[0];
-  x4[1] = x3[1];
-  x4[2] = x3[2];
-  x4[3] = x3[3];
-  btf_16_w4_sse2(&cospi_p56_p08, &cospi_m08_p56, __rounding, cos_bit, &x3[4],
-                 &x3[7], &x4[4], &x4[7]);
-  btf_16_w4_sse2(&cospi_p24_p40, &cospi_m40_p24, __rounding, cos_bit, &x3[5],
-                 &x3[6], &x4[5], &x4[6]);
-
-  // stage 5
-  output[0] = x4[0];
-  output[1] = x4[4];
-  output[2] = x4[2];
-  output[3] = x4[6];
-  output[4] = x4[1];
-  output[5] = x4[5];
-  output[6] = x4[3];
-  output[7] = x4[7];
-}
-
-static void fdct8x8_new_sse2(const __m128i *input, __m128i *output,
-                             int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
-  __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
-  __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
-  __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
-  __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
-  __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
-
-  // stage 1
-  __m128i x1[8];
-  x1[0] = _mm_adds_epi16(input[0], input[7]);
-  x1[7] = _mm_subs_epi16(input[0], input[7]);
-  x1[1] = _mm_adds_epi16(input[1], input[6]);
-  x1[6] = _mm_subs_epi16(input[1], input[6]);
-  x1[2] = _mm_adds_epi16(input[2], input[5]);
-  x1[5] = _mm_subs_epi16(input[2], input[5]);
-  x1[3] = _mm_adds_epi16(input[3], input[4]);
-  x1[4] = _mm_subs_epi16(input[3], input[4]);
-
-  // stage 2
-  __m128i x2[8];
-  x2[0] = _mm_adds_epi16(x1[0], x1[3]);
-  x2[3] = _mm_subs_epi16(x1[0], x1[3]);
-  x2[1] = _mm_adds_epi16(x1[1], x1[2]);
-  x2[2] = _mm_subs_epi16(x1[1], x1[2]);
-  x2[4] = x1[4];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[5], x1[6], x2[5], x2[6]);
-  x2[7] = x1[7];
-
-  // stage 3
-  __m128i x3[8];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x2[0], x2[1], x3[0], x3[1]);
-  btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x2[2], x2[3], x3[2], x3[3]);
-  x3[4] = _mm_adds_epi16(x2[4], x2[5]);
-  x3[5] = _mm_subs_epi16(x2[4], x2[5]);
-  x3[6] = _mm_subs_epi16(x2[7], x2[6]);
-  x3[7] = _mm_adds_epi16(x2[7], x2[6]);
-
-  // stage 4
-  __m128i x4[8];
-  x4[0] = x3[0];
-  x4[1] = x3[1];
-  x4[2] = x3[2];
-  x4[3] = x3[3];
-  btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x3[4], x3[7], x4[4], x4[7]);
-  btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x3[5], x3[6], x4[5], x4[6]);
-
-  // stage 5
-  output[0] = x4[0];
-  output[1] = x4[4];
-  output[2] = x4[2];
-  output[3] = x4[6];
-  output[4] = x4[1];
-  output[5] = x4[5];
-  output[6] = x4[3];
-  output[7] = x4[7];
-}
-
-static void fdct8x16_new_sse2(const __m128i *input, __m128i *output,
-                              int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
-  __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
-  __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
-  __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
-  __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
-  __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
-  __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
-  __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
-  __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
-  __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
-  __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
-  __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
-  __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
-  __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
-  __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
-
-  // stage 1
-  __m128i x1[16];
-  x1[0] = _mm_adds_epi16(input[0], input[15]);
-  x1[15] = _mm_subs_epi16(input[0], input[15]);
-  x1[1] = _mm_adds_epi16(input[1], input[14]);
-  x1[14] = _mm_subs_epi16(input[1], input[14]);
-  x1[2] = _mm_adds_epi16(input[2], input[13]);
-  x1[13] = _mm_subs_epi16(input[2], input[13]);
-  x1[3] = _mm_adds_epi16(input[3], input[12]);
-  x1[12] = _mm_subs_epi16(input[3], input[12]);
-  x1[4] = _mm_adds_epi16(input[4], input[11]);
-  x1[11] = _mm_subs_epi16(input[4], input[11]);
-  x1[5] = _mm_adds_epi16(input[5], input[10]);
-  x1[10] = _mm_subs_epi16(input[5], input[10]);
-  x1[6] = _mm_adds_epi16(input[6], input[9]);
-  x1[9] = _mm_subs_epi16(input[6], input[9]);
-  x1[7] = _mm_adds_epi16(input[7], input[8]);
-  x1[8] = _mm_subs_epi16(input[7], input[8]);
-
-  // stage 2
-  __m128i x2[16];
-  x2[0] = _mm_adds_epi16(x1[0], x1[7]);
-  x2[7] = _mm_subs_epi16(x1[0], x1[7]);
-  x2[1] = _mm_adds_epi16(x1[1], x1[6]);
-  x2[6] = _mm_subs_epi16(x1[1], x1[6]);
-  x2[2] = _mm_adds_epi16(x1[2], x1[5]);
-  x2[5] = _mm_subs_epi16(x1[2], x1[5]);
-  x2[3] = _mm_adds_epi16(x1[3], x1[4]);
-  x2[4] = _mm_subs_epi16(x1[3], x1[4]);
-  x2[8] = x1[8];
-  x2[9] = x1[9];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[10], x1[13], x2[10], x2[13]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[11], x1[12], x2[11], x2[12]);
-  x2[14] = x1[14];
-  x2[15] = x1[15];
-
-  // stage 3
-  __m128i x3[16];
-  x3[0] = _mm_adds_epi16(x2[0], x2[3]);
-  x3[3] = _mm_subs_epi16(x2[0], x2[3]);
-  x3[1] = _mm_adds_epi16(x2[1], x2[2]);
-  x3[2] = _mm_subs_epi16(x2[1], x2[2]);
-  x3[4] = x2[4];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[5], x2[6], x3[5], x3[6]);
-  x3[7] = x2[7];
-  x3[8] = _mm_adds_epi16(x2[8], x2[11]);
-  x3[11] = _mm_subs_epi16(x2[8], x2[11]);
-  x3[9] = _mm_adds_epi16(x2[9], x2[10]);
-  x3[10] = _mm_subs_epi16(x2[9], x2[10]);
-  x3[12] = _mm_subs_epi16(x2[15], x2[12]);
-  x3[15] = _mm_adds_epi16(x2[15], x2[12]);
-  x3[13] = _mm_subs_epi16(x2[14], x2[13]);
-  x3[14] = _mm_adds_epi16(x2[14], x2[13]);
-
-  // stage 4
-  __m128i x4[16];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x3[0], x3[1], x4[0], x4[1]);
-  btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x3[2], x3[3], x4[2], x4[3]);
-  x4[4] = _mm_adds_epi16(x3[4], x3[5]);
-  x4[5] = _mm_subs_epi16(x3[4], x3[5]);
-  x4[6] = _mm_subs_epi16(x3[7], x3[6]);
-  x4[7] = _mm_adds_epi16(x3[7], x3[6]);
-  x4[8] = x3[8];
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[9], x3[14], x4[9], x4[14]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[10], x3[13], x4[10], x4[13]);
-  x4[11] = x3[11];
-  x4[12] = x3[12];
-  x4[15] = x3[15];
-
-  // stage 5
-  __m128i x5[16];
-  x5[0] = x4[0];
-  x5[1] = x4[1];
-  x5[2] = x4[2];
-  x5[3] = x4[3];
-  btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x4[4], x4[7], x5[4], x5[7]);
-  btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x4[5], x4[6], x5[5], x5[6]);
-  x5[8] = _mm_adds_epi16(x4[8], x4[9]);
-  x5[9] = _mm_subs_epi16(x4[8], x4[9]);
-  x5[10] = _mm_subs_epi16(x4[11], x4[10]);
-  x5[11] = _mm_adds_epi16(x4[11], x4[10]);
-  x5[12] = _mm_adds_epi16(x4[12], x4[13]);
-  x5[13] = _mm_subs_epi16(x4[12], x4[13]);
-  x5[14] = _mm_subs_epi16(x4[15], x4[14]);
-  x5[15] = _mm_adds_epi16(x4[15], x4[14]);
-
-  // stage 6
-  __m128i x6[16];
-  x6[0] = x5[0];
-  x6[1] = x5[1];
-  x6[2] = x5[2];
-  x6[3] = x5[3];
-  x6[4] = x5[4];
-  x6[5] = x5[5];
-  x6[6] = x5[6];
-  x6[7] = x5[7];
-  btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x5[8], x5[15], x6[8], x6[15]);
-  btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x5[9], x5[14], x6[9], x6[14]);
-  btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x5[10], x5[13], x6[10], x6[13]);
-  btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x5[11], x5[12], x6[11], x6[12]);
-
-  // stage 7
-  output[0] = x6[0];
-  output[1] = x6[8];
-  output[2] = x6[4];
-  output[3] = x6[12];
-  output[4] = x6[2];
-  output[5] = x6[10];
-  output[6] = x6[6];
-  output[7] = x6[14];
-  output[8] = x6[1];
-  output[9] = x6[9];
-  output[10] = x6[5];
-  output[11] = x6[13];
-  output[12] = x6[3];
-  output[13] = x6[11];
-  output[14] = x6[7];
-  output[15] = x6[15];
-}
-
-void fdct8x32_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
-  __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
-  __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
-  __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
-  __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
-  __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
-  __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
-  __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
-  __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
-  __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
-  __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
-  __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
-  __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
-  __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
-  __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
-  __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
-  __m128i cospi_p62_p02 = pair_set_epi16(cospi[62], cospi[2]);
-  __m128i cospi_m02_p62 = pair_set_epi16(-cospi[2], cospi[62]);
-  __m128i cospi_p30_p34 = pair_set_epi16(cospi[30], cospi[34]);
-  __m128i cospi_m34_p30 = pair_set_epi16(-cospi[34], cospi[30]);
-  __m128i cospi_p46_p18 = pair_set_epi16(cospi[46], cospi[18]);
-  __m128i cospi_m18_p46 = pair_set_epi16(-cospi[18], cospi[46]);
-  __m128i cospi_p14_p50 = pair_set_epi16(cospi[14], cospi[50]);
-  __m128i cospi_m50_p14 = pair_set_epi16(-cospi[50], cospi[14]);
-  __m128i cospi_p54_p10 = pair_set_epi16(cospi[54], cospi[10]);
-  __m128i cospi_m10_p54 = pair_set_epi16(-cospi[10], cospi[54]);
-  __m128i cospi_p22_p42 = pair_set_epi16(cospi[22], cospi[42]);
-  __m128i cospi_m42_p22 = pair_set_epi16(-cospi[42], cospi[22]);
-  __m128i cospi_p38_p26 = pair_set_epi16(cospi[38], cospi[26]);
-  __m128i cospi_m26_p38 = pair_set_epi16(-cospi[26], cospi[38]);
-  __m128i cospi_p06_p58 = pair_set_epi16(cospi[6], cospi[58]);
-  __m128i cospi_m58_p06 = pair_set_epi16(-cospi[58], cospi[6]);
-
-  // stage 1
-  __m128i x1[32];
-  x1[0] = _mm_adds_epi16(input[0], input[31]);
-  x1[31] = _mm_subs_epi16(input[0], input[31]);
-  x1[1] = _mm_adds_epi16(input[1], input[30]);
-  x1[30] = _mm_subs_epi16(input[1], input[30]);
-  x1[2] = _mm_adds_epi16(input[2], input[29]);
-  x1[29] = _mm_subs_epi16(input[2], input[29]);
-  x1[3] = _mm_adds_epi16(input[3], input[28]);
-  x1[28] = _mm_subs_epi16(input[3], input[28]);
-  x1[4] = _mm_adds_epi16(input[4], input[27]);
-  x1[27] = _mm_subs_epi16(input[4], input[27]);
-  x1[5] = _mm_adds_epi16(input[5], input[26]);
-  x1[26] = _mm_subs_epi16(input[5], input[26]);
-  x1[6] = _mm_adds_epi16(input[6], input[25]);
-  x1[25] = _mm_subs_epi16(input[6], input[25]);
-  x1[7] = _mm_adds_epi16(input[7], input[24]);
-  x1[24] = _mm_subs_epi16(input[7], input[24]);
-  x1[8] = _mm_adds_epi16(input[8], input[23]);
-  x1[23] = _mm_subs_epi16(input[8], input[23]);
-  x1[9] = _mm_adds_epi16(input[9], input[22]);
-  x1[22] = _mm_subs_epi16(input[9], input[22]);
-  x1[10] = _mm_adds_epi16(input[10], input[21]);
-  x1[21] = _mm_subs_epi16(input[10], input[21]);
-  x1[11] = _mm_adds_epi16(input[11], input[20]);
-  x1[20] = _mm_subs_epi16(input[11], input[20]);
-  x1[12] = _mm_adds_epi16(input[12], input[19]);
-  x1[19] = _mm_subs_epi16(input[12], input[19]);
-  x1[13] = _mm_adds_epi16(input[13], input[18]);
-  x1[18] = _mm_subs_epi16(input[13], input[18]);
-  x1[14] = _mm_adds_epi16(input[14], input[17]);
-  x1[17] = _mm_subs_epi16(input[14], input[17]);
-  x1[15] = _mm_adds_epi16(input[15], input[16]);
-  x1[16] = _mm_subs_epi16(input[15], input[16]);
-
-  // stage 2
-  __m128i x2[32];
-  x2[0] = _mm_adds_epi16(x1[0], x1[15]);
-  x2[15] = _mm_subs_epi16(x1[0], x1[15]);
-  x2[1] = _mm_adds_epi16(x1[1], x1[14]);
-  x2[14] = _mm_subs_epi16(x1[1], x1[14]);
-  x2[2] = _mm_adds_epi16(x1[2], x1[13]);
-  x2[13] = _mm_subs_epi16(x1[2], x1[13]);
-  x2[3] = _mm_adds_epi16(x1[3], x1[12]);
-  x2[12] = _mm_subs_epi16(x1[3], x1[12]);
-  x2[4] = _mm_adds_epi16(x1[4], x1[11]);
-  x2[11] = _mm_subs_epi16(x1[4], x1[11]);
-  x2[5] = _mm_adds_epi16(x1[5], x1[10]);
-  x2[10] = _mm_subs_epi16(x1[5], x1[10]);
-  x2[6] = _mm_adds_epi16(x1[6], x1[9]);
-  x2[9] = _mm_subs_epi16(x1[6], x1[9]);
-  x2[7] = _mm_adds_epi16(x1[7], x1[8]);
-  x2[8] = _mm_subs_epi16(x1[7], x1[8]);
-  x2[16] = x1[16];
-  x2[17] = x1[17];
-  x2[18] = x1[18];
-  x2[19] = x1[19];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[20], x1[27], x2[20], x2[27]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[21], x1[26], x2[21], x2[26]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[22], x1[25], x2[22], x2[25]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[23], x1[24], x2[23], x2[24]);
-  x2[28] = x1[28];
-  x2[29] = x1[29];
-  x2[30] = x1[30];
-  x2[31] = x1[31];
-
-  // stage 3
-  __m128i x3[32];
-  x3[0] = _mm_adds_epi16(x2[0], x2[7]);
-  x3[7] = _mm_subs_epi16(x2[0], x2[7]);
-  x3[1] = _mm_adds_epi16(x2[1], x2[6]);
-  x3[6] = _mm_subs_epi16(x2[1], x2[6]);
-  x3[2] = _mm_adds_epi16(x2[2], x2[5]);
-  x3[5] = _mm_subs_epi16(x2[2], x2[5]);
-  x3[3] = _mm_adds_epi16(x2[3], x2[4]);
-  x3[4] = _mm_subs_epi16(x2[3], x2[4]);
-  x3[8] = x2[8];
-  x3[9] = x2[9];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[10], x2[13], x3[10], x3[13]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[11], x2[12], x3[11], x3[12]);
-  x3[14] = x2[14];
-  x3[15] = x2[15];
-  x3[16] = _mm_adds_epi16(x2[16], x2[23]);
-  x3[23] = _mm_subs_epi16(x2[16], x2[23]);
-  x3[17] = _mm_adds_epi16(x2[17], x2[22]);
-  x3[22] = _mm_subs_epi16(x2[17], x2[22]);
-  x3[18] = _mm_adds_epi16(x2[18], x2[21]);
-  x3[21] = _mm_subs_epi16(x2[18], x2[21]);
-  x3[19] = _mm_adds_epi16(x2[19], x2[20]);
-  x3[20] = _mm_subs_epi16(x2[19], x2[20]);
-  x3[24] = _mm_subs_epi16(x2[31], x2[24]);
-  x3[31] = _mm_adds_epi16(x2[31], x2[24]);
-  x3[25] = _mm_subs_epi16(x2[30], x2[25]);
-  x3[30] = _mm_adds_epi16(x2[30], x2[25]);
-  x3[26] = _mm_subs_epi16(x2[29], x2[26]);
-  x3[29] = _mm_adds_epi16(x2[29], x2[26]);
-  x3[27] = _mm_subs_epi16(x2[28], x2[27]);
-  x3[28] = _mm_adds_epi16(x2[28], x2[27]);
-
-  // stage 4
-  __m128i x4[32];
-  x4[0] = _mm_adds_epi16(x3[0], x3[3]);
-  x4[3] = _mm_subs_epi16(x3[0], x3[3]);
-  x4[1] = _mm_adds_epi16(x3[1], x3[2]);
-  x4[2] = _mm_subs_epi16(x3[1], x3[2]);
-  x4[4] = x3[4];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[5], x3[6], x4[5], x4[6]);
-  x4[7] = x3[7];
-  x4[8] = _mm_adds_epi16(x3[8], x3[11]);
-  x4[11] = _mm_subs_epi16(x3[8], x3[11]);
-  x4[9] = _mm_adds_epi16(x3[9], x3[10]);
-  x4[10] = _mm_subs_epi16(x3[9], x3[10]);
-  x4[12] = _mm_subs_epi16(x3[15], x3[12]);
-  x4[15] = _mm_adds_epi16(x3[15], x3[12]);
-  x4[13] = _mm_subs_epi16(x3[14], x3[13]);
-  x4[14] = _mm_adds_epi16(x3[14], x3[13]);
-  x4[16] = x3[16];
-  x4[17] = x3[17];
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[18], x3[29], x4[18], x4[29]);
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[19], x3[28], x4[19], x4[28]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[20], x3[27], x4[20], x4[27]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[21], x3[26], x4[21], x4[26]);
-  x4[22] = x3[22];
-  x4[23] = x3[23];
-  x4[24] = x3[24];
-  x4[25] = x3[25];
-  x4[30] = x3[30];
-  x4[31] = x3[31];
-
-  // stage 5
-  __m128i x5[32];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x4[0], x4[1], x5[0], x5[1]);
-  btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x4[2], x4[3], x5[2], x5[3]);
-  x5[4] = _mm_adds_epi16(x4[4], x4[5]);
-  x5[5] = _mm_subs_epi16(x4[4], x4[5]);
-  x5[6] = _mm_subs_epi16(x4[7], x4[6]);
-  x5[7] = _mm_adds_epi16(x4[7], x4[6]);
-  x5[8] = x4[8];
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[9], x4[14], x5[9], x5[14]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[10], x4[13], x5[10], x5[13]);
-  x5[11] = x4[11];
-  x5[12] = x4[12];
-  x5[15] = x4[15];
-  x5[16] = _mm_adds_epi16(x4[16], x4[19]);
-  x5[19] = _mm_subs_epi16(x4[16], x4[19]);
-  x5[17] = _mm_adds_epi16(x4[17], x4[18]);
-  x5[18] = _mm_subs_epi16(x4[17], x4[18]);
-  x5[20] = _mm_subs_epi16(x4[23], x4[20]);
-  x5[23] = _mm_adds_epi16(x4[23], x4[20]);
-  x5[21] = _mm_subs_epi16(x4[22], x4[21]);
-  x5[22] = _mm_adds_epi16(x4[22], x4[21]);
-  x5[24] = _mm_adds_epi16(x4[24], x4[27]);
-  x5[27] = _mm_subs_epi16(x4[24], x4[27]);
-  x5[25] = _mm_adds_epi16(x4[25], x4[26]);
-  x5[26] = _mm_subs_epi16(x4[25], x4[26]);
-  x5[28] = _mm_subs_epi16(x4[31], x4[28]);
-  x5[31] = _mm_adds_epi16(x4[31], x4[28]);
-  x5[29] = _mm_subs_epi16(x4[30], x4[29]);
-  x5[30] = _mm_adds_epi16(x4[30], x4[29]);
-
-  // stage 6
-  __m128i x6[32];
-  x6[0] = x5[0];
-  x6[1] = x5[1];
-  x6[2] = x5[2];
-  x6[3] = x5[3];
-  btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x5[4], x5[7], x6[4], x6[7]);
-  btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x5[5], x5[6], x6[5], x6[6]);
-  x6[8] = _mm_adds_epi16(x5[8], x5[9]);
-  x6[9] = _mm_subs_epi16(x5[8], x5[9]);
-  x6[10] = _mm_subs_epi16(x5[11], x5[10]);
-  x6[11] = _mm_adds_epi16(x5[11], x5[10]);
-  x6[12] = _mm_adds_epi16(x5[12], x5[13]);
-  x6[13] = _mm_subs_epi16(x5[12], x5[13]);
-  x6[14] = _mm_subs_epi16(x5[15], x5[14]);
-  x6[15] = _mm_adds_epi16(x5[15], x5[14]);
-  x6[16] = x5[16];
-  btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[17], x5[30], x6[17], x6[30]);
-  btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[18], x5[29], x6[18], x6[29]);
-  x6[19] = x5[19];
-  x6[20] = x5[20];
-  btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[21], x5[26], x6[21], x6[26]);
-  btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[22], x5[25], x6[22], x6[25]);
-  x6[23] = x5[23];
-  x6[24] = x5[24];
-  x6[27] = x5[27];
-  x6[28] = x5[28];
-  x6[31] = x5[31];
-
-  // stage 7
-  __m128i x7[32];
-  x7[0] = x6[0];
-  x7[1] = x6[1];
-  x7[2] = x6[2];
-  x7[3] = x6[3];
-  x7[4] = x6[4];
-  x7[5] = x6[5];
-  x7[6] = x6[6];
-  x7[7] = x6[7];
-  btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x6[8], x6[15], x7[8], x7[15]);
-  btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x6[9], x6[14], x7[9], x7[14]);
-  btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x6[10], x6[13], x7[10], x7[13]);
-  btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x6[11], x6[12], x7[11], x7[12]);
-  x7[16] = _mm_adds_epi16(x6[16], x6[17]);
-  x7[17] = _mm_subs_epi16(x6[16], x6[17]);
-  x7[18] = _mm_subs_epi16(x6[19], x6[18]);
-  x7[19] = _mm_adds_epi16(x6[19], x6[18]);
-  x7[20] = _mm_adds_epi16(x6[20], x6[21]);
-  x7[21] = _mm_subs_epi16(x6[20], x6[21]);
-  x7[22] = _mm_subs_epi16(x6[23], x6[22]);
-  x7[23] = _mm_adds_epi16(x6[23], x6[22]);
-  x7[24] = _mm_adds_epi16(x6[24], x6[25]);
-  x7[25] = _mm_subs_epi16(x6[24], x6[25]);
-  x7[26] = _mm_subs_epi16(x6[27], x6[26]);
-  x7[27] = _mm_adds_epi16(x6[27], x6[26]);
-  x7[28] = _mm_adds_epi16(x6[28], x6[29]);
-  x7[29] = _mm_subs_epi16(x6[28], x6[29]);
-  x7[30] = _mm_subs_epi16(x6[31], x6[30]);
-  x7[31] = _mm_adds_epi16(x6[31], x6[30]);
-
-  // stage 8
-  __m128i x8[32];
-  x8[0] = x7[0];
-  x8[1] = x7[1];
-  x8[2] = x7[2];
-  x8[3] = x7[3];
-  x8[4] = x7[4];
-  x8[5] = x7[5];
-  x8[6] = x7[6];
-  x8[7] = x7[7];
-  x8[8] = x7[8];
-  x8[9] = x7[9];
-  x8[10] = x7[10];
-  x8[11] = x7[11];
-  x8[12] = x7[12];
-  x8[13] = x7[13];
-  x8[14] = x7[14];
-  x8[15] = x7[15];
-  btf_16_sse2(cospi_p62_p02, cospi_m02_p62, x7[16], x7[31], x8[16], x8[31]);
-  btf_16_sse2(cospi_p30_p34, cospi_m34_p30, x7[17], x7[30], x8[17], x8[30]);
-  btf_16_sse2(cospi_p46_p18, cospi_m18_p46, x7[18], x7[29], x8[18], x8[29]);
-  btf_16_sse2(cospi_p14_p50, cospi_m50_p14, x7[19], x7[28], x8[19], x8[28]);
-  btf_16_sse2(cospi_p54_p10, cospi_m10_p54, x7[20], x7[27], x8[20], x8[27]);
-  btf_16_sse2(cospi_p22_p42, cospi_m42_p22, x7[21], x7[26], x8[21], x8[26]);
-  btf_16_sse2(cospi_p38_p26, cospi_m26_p38, x7[22], x7[25], x8[22], x8[25]);
-  btf_16_sse2(cospi_p06_p58, cospi_m58_p06, x7[23], x7[24], x8[23], x8[24]);
-
-  // stage 9
-  output[0] = x8[0];
-  output[1] = x8[16];
-  output[2] = x8[8];
-  output[3] = x8[24];
-  output[4] = x8[4];
-  output[5] = x8[20];
-  output[6] = x8[12];
-  output[7] = x8[28];
-  output[8] = x8[2];
-  output[9] = x8[18];
-  output[10] = x8[10];
-  output[11] = x8[26];
-  output[12] = x8[6];
-  output[13] = x8[22];
-  output[14] = x8[14];
-  output[15] = x8[30];
-  output[16] = x8[1];
-  output[17] = x8[17];
-  output[18] = x8[9];
-  output[19] = x8[25];
-  output[20] = x8[5];
-  output[21] = x8[21];
-  output[22] = x8[13];
-  output[23] = x8[29];
-  output[24] = x8[3];
-  output[25] = x8[19];
-  output[26] = x8[11];
-  output[27] = x8[27];
-  output[28] = x8[7];
-  output[29] = x8[23];
-  output[30] = x8[15];
-  output[31] = x8[31];
-}
-
-void fdct8x64_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]);
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]);
-  __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]);
-  __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]);
-  __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]);
-  __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]);
-  __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]);
-  __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]);
-  __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]);
-  __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]);
-  __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]);
-  __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]);
-  __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]);
-  __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]);
-  __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]);
-  __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]);
-  __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]);
-  __m128i cospi_m60_m04 = pair_set_epi16(-cospi[60], -cospi[4]);
-  __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]);
-  __m128i cospi_m44_m20 = pair_set_epi16(-cospi[44], -cospi[20]);
-  __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]);
-  __m128i cospi_p62_p02 = pair_set_epi16(cospi[62], cospi[2]);
-  __m128i cospi_m02_p62 = pair_set_epi16(-cospi[2], cospi[62]);
-  __m128i cospi_p30_p34 = pair_set_epi16(cospi[30], cospi[34]);
-  __m128i cospi_m34_p30 = pair_set_epi16(-cospi[34], cospi[30]);
-  __m128i cospi_p46_p18 = pair_set_epi16(cospi[46], cospi[18]);
-  __m128i cospi_m18_p46 = pair_set_epi16(-cospi[18], cospi[46]);
-  __m128i cospi_p14_p50 = pair_set_epi16(cospi[14], cospi[50]);
-  __m128i cospi_m50_p14 = pair_set_epi16(-cospi[50], cospi[14]);
-  __m128i cospi_p54_p10 = pair_set_epi16(cospi[54], cospi[10]);
-  __m128i cospi_m10_p54 = pair_set_epi16(-cospi[10], cospi[54]);
-  __m128i cospi_p22_p42 = pair_set_epi16(cospi[22], cospi[42]);
-  __m128i cospi_m42_p22 = pair_set_epi16(-cospi[42], cospi[22]);
-  __m128i cospi_p38_p26 = pair_set_epi16(cospi[38], cospi[26]);
-  __m128i cospi_m26_p38 = pair_set_epi16(-cospi[26], cospi[38]);
-  __m128i cospi_p06_p58 = pair_set_epi16(cospi[6], cospi[58]);
-  __m128i cospi_m58_p06 = pair_set_epi16(-cospi[58], cospi[6]);
-  __m128i cospi_p63_p01 = pair_set_epi16(cospi[63], cospi[1]);
-  __m128i cospi_m01_p63 = pair_set_epi16(-cospi[1], cospi[63]);
-  __m128i cospi_p31_p33 = pair_set_epi16(cospi[31], cospi[33]);
-  __m128i cospi_m33_p31 = pair_set_epi16(-cospi[33], cospi[31]);
-  __m128i cospi_p47_p17 = pair_set_epi16(cospi[47], cospi[17]);
-  __m128i cospi_m17_p47 = pair_set_epi16(-cospi[17], cospi[47]);
-  __m128i cospi_p15_p49 = pair_set_epi16(cospi[15], cospi[49]);
-  __m128i cospi_m49_p15 = pair_set_epi16(-cospi[49], cospi[15]);
-  __m128i cospi_p55_p09 = pair_set_epi16(cospi[55], cospi[9]);
-  __m128i cospi_m09_p55 = pair_set_epi16(-cospi[9], cospi[55]);
-  __m128i cospi_p23_p41 = pair_set_epi16(cospi[23], cospi[41]);
-  __m128i cospi_m41_p23 = pair_set_epi16(-cospi[41], cospi[23]);
-  __m128i cospi_p39_p25 = pair_set_epi16(cospi[39], cospi[25]);
-  __m128i cospi_m25_p39 = pair_set_epi16(-cospi[25], cospi[39]);
-  __m128i cospi_p07_p57 = pair_set_epi16(cospi[7], cospi[57]);
-  __m128i cospi_m57_p07 = pair_set_epi16(-cospi[57], cospi[7]);
-  __m128i cospi_p59_p05 = pair_set_epi16(cospi[59], cospi[5]);
-  __m128i cospi_m05_p59 = pair_set_epi16(-cospi[5], cospi[59]);
-  __m128i cospi_p27_p37 = pair_set_epi16(cospi[27], cospi[37]);
-  __m128i cospi_m37_p27 = pair_set_epi16(-cospi[37], cospi[27]);
-  __m128i cospi_p43_p21 = pair_set_epi16(cospi[43], cospi[21]);
-  __m128i cospi_m21_p43 = pair_set_epi16(-cospi[21], cospi[43]);
-  __m128i cospi_p11_p53 = pair_set_epi16(cospi[11], cospi[53]);
-  __m128i cospi_m53_p11 = pair_set_epi16(-cospi[53], cospi[11]);
-  __m128i cospi_p51_p13 = pair_set_epi16(cospi[51], cospi[13]);
-  __m128i cospi_m13_p51 = pair_set_epi16(-cospi[13], cospi[51]);
-  __m128i cospi_p19_p45 = pair_set_epi16(cospi[19], cospi[45]);
-  __m128i cospi_m45_p19 = pair_set_epi16(-cospi[45], cospi[19]);
-  __m128i cospi_p35_p29 = pair_set_epi16(cospi[35], cospi[29]);
-  __m128i cospi_m29_p35 = pair_set_epi16(-cospi[29], cospi[35]);
-  __m128i cospi_p03_p61 = pair_set_epi16(cospi[3], cospi[61]);
-  __m128i cospi_m61_p03 = pair_set_epi16(-cospi[61], cospi[3]);
-
-  // stage 1
-  __m128i x1[64];
-  x1[0] = _mm_adds_epi16(input[0], input[63]);
-  x1[63] = _mm_subs_epi16(input[0], input[63]);
-  x1[1] = _mm_adds_epi16(input[1], input[62]);
-  x1[62] = _mm_subs_epi16(input[1], input[62]);
-  x1[2] = _mm_adds_epi16(input[2], input[61]);
-  x1[61] = _mm_subs_epi16(input[2], input[61]);
-  x1[3] = _mm_adds_epi16(input[3], input[60]);
-  x1[60] = _mm_subs_epi16(input[3], input[60]);
-  x1[4] = _mm_adds_epi16(input[4], input[59]);
-  x1[59] = _mm_subs_epi16(input[4], input[59]);
-  x1[5] = _mm_adds_epi16(input[5], input[58]);
-  x1[58] = _mm_subs_epi16(input[5], input[58]);
-  x1[6] = _mm_adds_epi16(input[6], input[57]);
-  x1[57] = _mm_subs_epi16(input[6], input[57]);
-  x1[7] = _mm_adds_epi16(input[7], input[56]);
-  x1[56] = _mm_subs_epi16(input[7], input[56]);
-  x1[8] = _mm_adds_epi16(input[8], input[55]);
-  x1[55] = _mm_subs_epi16(input[8], input[55]);
-  x1[9] = _mm_adds_epi16(input[9], input[54]);
-  x1[54] = _mm_subs_epi16(input[9], input[54]);
-  x1[10] = _mm_adds_epi16(input[10], input[53]);
-  x1[53] = _mm_subs_epi16(input[10], input[53]);
-  x1[11] = _mm_adds_epi16(input[11], input[52]);
-  x1[52] = _mm_subs_epi16(input[11], input[52]);
-  x1[12] = _mm_adds_epi16(input[12], input[51]);
-  x1[51] = _mm_subs_epi16(input[12], input[51]);
-  x1[13] = _mm_adds_epi16(input[13], input[50]);
-  x1[50] = _mm_subs_epi16(input[13], input[50]);
-  x1[14] = _mm_adds_epi16(input[14], input[49]);
-  x1[49] = _mm_subs_epi16(input[14], input[49]);
-  x1[15] = _mm_adds_epi16(input[15], input[48]);
-  x1[48] = _mm_subs_epi16(input[15], input[48]);
-  x1[16] = _mm_adds_epi16(input[16], input[47]);
-  x1[47] = _mm_subs_epi16(input[16], input[47]);
-  x1[17] = _mm_adds_epi16(input[17], input[46]);
-  x1[46] = _mm_subs_epi16(input[17], input[46]);
-  x1[18] = _mm_adds_epi16(input[18], input[45]);
-  x1[45] = _mm_subs_epi16(input[18], input[45]);
-  x1[19] = _mm_adds_epi16(input[19], input[44]);
-  x1[44] = _mm_subs_epi16(input[19], input[44]);
-  x1[20] = _mm_adds_epi16(input[20], input[43]);
-  x1[43] = _mm_subs_epi16(input[20], input[43]);
-  x1[21] = _mm_adds_epi16(input[21], input[42]);
-  x1[42] = _mm_subs_epi16(input[21], input[42]);
-  x1[22] = _mm_adds_epi16(input[22], input[41]);
-  x1[41] = _mm_subs_epi16(input[22], input[41]);
-  x1[23] = _mm_adds_epi16(input[23], input[40]);
-  x1[40] = _mm_subs_epi16(input[23], input[40]);
-  x1[24] = _mm_adds_epi16(input[24], input[39]);
-  x1[39] = _mm_subs_epi16(input[24], input[39]);
-  x1[25] = _mm_adds_epi16(input[25], input[38]);
-  x1[38] = _mm_subs_epi16(input[25], input[38]);
-  x1[26] = _mm_adds_epi16(input[26], input[37]);
-  x1[37] = _mm_subs_epi16(input[26], input[37]);
-  x1[27] = _mm_adds_epi16(input[27], input[36]);
-  x1[36] = _mm_subs_epi16(input[27], input[36]);
-  x1[28] = _mm_adds_epi16(input[28], input[35]);
-  x1[35] = _mm_subs_epi16(input[28], input[35]);
-  x1[29] = _mm_adds_epi16(input[29], input[34]);
-  x1[34] = _mm_subs_epi16(input[29], input[34]);
-  x1[30] = _mm_adds_epi16(input[30], input[33]);
-  x1[33] = _mm_subs_epi16(input[30], input[33]);
-  x1[31] = _mm_adds_epi16(input[31], input[32]);
-  x1[32] = _mm_subs_epi16(input[31], input[32]);
-
-  // stage 2
-  __m128i x2[64];
-  x2[0] = _mm_adds_epi16(x1[0], x1[31]);
-  x2[31] = _mm_subs_epi16(x1[0], x1[31]);
-  x2[1] = _mm_adds_epi16(x1[1], x1[30]);
-  x2[30] = _mm_subs_epi16(x1[1], x1[30]);
-  x2[2] = _mm_adds_epi16(x1[2], x1[29]);
-  x2[29] = _mm_subs_epi16(x1[2], x1[29]);
-  x2[3] = _mm_adds_epi16(x1[3], x1[28]);
-  x2[28] = _mm_subs_epi16(x1[3], x1[28]);
-  x2[4] = _mm_adds_epi16(x1[4], x1[27]);
-  x2[27] = _mm_subs_epi16(x1[4], x1[27]);
-  x2[5] = _mm_adds_epi16(x1[5], x1[26]);
-  x2[26] = _mm_subs_epi16(x1[5], x1[26]);
-  x2[6] = _mm_adds_epi16(x1[6], x1[25]);
-  x2[25] = _mm_subs_epi16(x1[6], x1[25]);
-  x2[7] = _mm_adds_epi16(x1[7], x1[24]);
-  x2[24] = _mm_subs_epi16(x1[7], x1[24]);
-  x2[8] = _mm_adds_epi16(x1[8], x1[23]);
-  x2[23] = _mm_subs_epi16(x1[8], x1[23]);
-  x2[9] = _mm_adds_epi16(x1[9], x1[22]);
-  x2[22] = _mm_subs_epi16(x1[9], x1[22]);
-  x2[10] = _mm_adds_epi16(x1[10], x1[21]);
-  x2[21] = _mm_subs_epi16(x1[10], x1[21]);
-  x2[11] = _mm_adds_epi16(x1[11], x1[20]);
-  x2[20] = _mm_subs_epi16(x1[11], x1[20]);
-  x2[12] = _mm_adds_epi16(x1[12], x1[19]);
-  x2[19] = _mm_subs_epi16(x1[12], x1[19]);
-  x2[13] = _mm_adds_epi16(x1[13], x1[18]);
-  x2[18] = _mm_subs_epi16(x1[13], x1[18]);
-  x2[14] = _mm_adds_epi16(x1[14], x1[17]);
-  x2[17] = _mm_subs_epi16(x1[14], x1[17]);
-  x2[15] = _mm_adds_epi16(x1[15], x1[16]);
-  x2[16] = _mm_subs_epi16(x1[15], x1[16]);
-  x2[32] = x1[32];
-  x2[33] = x1[33];
-  x2[34] = x1[34];
-  x2[35] = x1[35];
-  x2[36] = x1[36];
-  x2[37] = x1[37];
-  x2[38] = x1[38];
-  x2[39] = x1[39];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[40], x1[55], x2[40], x2[55]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[41], x1[54], x2[41], x2[54]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[42], x1[53], x2[42], x2[53]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[43], x1[52], x2[43], x2[52]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[44], x1[51], x2[44], x2[51]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[45], x1[50], x2[45], x2[50]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[46], x1[49], x2[46], x2[49]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[47], x1[48], x2[47], x2[48]);
-  x2[56] = x1[56];
-  x2[57] = x1[57];
-  x2[58] = x1[58];
-  x2[59] = x1[59];
-  x2[60] = x1[60];
-  x2[61] = x1[61];
-  x2[62] = x1[62];
-  x2[63] = x1[63];
-
-  // stage 3
-  __m128i x3[64];
-  x3[0] = _mm_adds_epi16(x2[0], x2[15]);
-  x3[15] = _mm_subs_epi16(x2[0], x2[15]);
-  x3[1] = _mm_adds_epi16(x2[1], x2[14]);
-  x3[14] = _mm_subs_epi16(x2[1], x2[14]);
-  x3[2] = _mm_adds_epi16(x2[2], x2[13]);
-  x3[13] = _mm_subs_epi16(x2[2], x2[13]);
-  x3[3] = _mm_adds_epi16(x2[3], x2[12]);
-  x3[12] = _mm_subs_epi16(x2[3], x2[12]);
-  x3[4] = _mm_adds_epi16(x2[4], x2[11]);
-  x3[11] = _mm_subs_epi16(x2[4], x2[11]);
-  x3[5] = _mm_adds_epi16(x2[5], x2[10]);
-  x3[10] = _mm_subs_epi16(x2[5], x2[10]);
-  x3[6] = _mm_adds_epi16(x2[6], x2[9]);
-  x3[9] = _mm_subs_epi16(x2[6], x2[9]);
-  x3[7] = _mm_adds_epi16(x2[7], x2[8]);
-  x3[8] = _mm_subs_epi16(x2[7], x2[8]);
-  x3[16] = x2[16];
-  x3[17] = x2[17];
-  x3[18] = x2[18];
-  x3[19] = x2[19];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[20], x2[27], x3[20], x3[27]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[21], x2[26], x3[21], x3[26]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[22], x2[25], x3[22], x3[25]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[23], x2[24], x3[23], x3[24]);
-  x3[28] = x2[28];
-  x3[29] = x2[29];
-  x3[30] = x2[30];
-  x3[31] = x2[31];
-  x3[32] = _mm_adds_epi16(x2[32], x2[47]);
-  x3[47] = _mm_subs_epi16(x2[32], x2[47]);
-  x3[33] = _mm_adds_epi16(x2[33], x2[46]);
-  x3[46] = _mm_subs_epi16(x2[33], x2[46]);
-  x3[34] = _mm_adds_epi16(x2[34], x2[45]);
-  x3[45] = _mm_subs_epi16(x2[34], x2[45]);
-  x3[35] = _mm_adds_epi16(x2[35], x2[44]);
-  x3[44] = _mm_subs_epi16(x2[35], x2[44]);
-  x3[36] = _mm_adds_epi16(x2[36], x2[43]);
-  x3[43] = _mm_subs_epi16(x2[36], x2[43]);
-  x3[37] = _mm_adds_epi16(x2[37], x2[42]);
-  x3[42] = _mm_subs_epi16(x2[37], x2[42]);
-  x3[38] = _mm_adds_epi16(x2[38], x2[41]);
-  x3[41] = _mm_subs_epi16(x2[38], x2[41]);
-  x3[39] = _mm_adds_epi16(x2[39], x2[40]);
-  x3[40] = _mm_subs_epi16(x2[39], x2[40]);
-  x3[48] = _mm_subs_epi16(x2[63], x2[48]);
-  x3[63] = _mm_adds_epi16(x2[63], x2[48]);
-  x3[49] = _mm_subs_epi16(x2[62], x2[49]);
-  x3[62] = _mm_adds_epi16(x2[62], x2[49]);
-  x3[50] = _mm_subs_epi16(x2[61], x2[50]);
-  x3[61] = _mm_adds_epi16(x2[61], x2[50]);
-  x3[51] = _mm_subs_epi16(x2[60], x2[51]);
-  x3[60] = _mm_adds_epi16(x2[60], x2[51]);
-  x3[52] = _mm_subs_epi16(x2[59], x2[52]);
-  x3[59] = _mm_adds_epi16(x2[59], x2[52]);
-  x3[53] = _mm_subs_epi16(x2[58], x2[53]);
-  x3[58] = _mm_adds_epi16(x2[58], x2[53]);
-  x3[54] = _mm_subs_epi16(x2[57], x2[54]);
-  x3[57] = _mm_adds_epi16(x2[57], x2[54]);
-  x3[55] = _mm_subs_epi16(x2[56], x2[55]);
-  x3[56] = _mm_adds_epi16(x2[56], x2[55]);
-
-  // stage 4
-  __m128i x4[64];
-  x4[0] = _mm_adds_epi16(x3[0], x3[7]);
-  x4[7] = _mm_subs_epi16(x3[0], x3[7]);
-  x4[1] = _mm_adds_epi16(x3[1], x3[6]);
-  x4[6] = _mm_subs_epi16(x3[1], x3[6]);
-  x4[2] = _mm_adds_epi16(x3[2], x3[5]);
-  x4[5] = _mm_subs_epi16(x3[2], x3[5]);
-  x4[3] = _mm_adds_epi16(x3[3], x3[4]);
-  x4[4] = _mm_subs_epi16(x3[3], x3[4]);
-  x4[8] = x3[8];
-  x4[9] = x3[9];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[10], x3[13], x4[10], x4[13]);
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[11], x3[12], x4[11], x4[12]);
-  x4[14] = x3[14];
-  x4[15] = x3[15];
-  x4[16] = _mm_adds_epi16(x3[16], x3[23]);
-  x4[23] = _mm_subs_epi16(x3[16], x3[23]);
-  x4[17] = _mm_adds_epi16(x3[17], x3[22]);
-  x4[22] = _mm_subs_epi16(x3[17], x3[22]);
-  x4[18] = _mm_adds_epi16(x3[18], x3[21]);
-  x4[21] = _mm_subs_epi16(x3[18], x3[21]);
-  x4[19] = _mm_adds_epi16(x3[19], x3[20]);
-  x4[20] = _mm_subs_epi16(x3[19], x3[20]);
-  x4[24] = _mm_subs_epi16(x3[31], x3[24]);
-  x4[31] = _mm_adds_epi16(x3[31], x3[24]);
-  x4[25] = _mm_subs_epi16(x3[30], x3[25]);
-  x4[30] = _mm_adds_epi16(x3[30], x3[25]);
-  x4[26] = _mm_subs_epi16(x3[29], x3[26]);
-  x4[29] = _mm_adds_epi16(x3[29], x3[26]);
-  x4[27] = _mm_subs_epi16(x3[28], x3[27]);
-  x4[28] = _mm_adds_epi16(x3[28], x3[27]);
-  x4[32] = x3[32];
-  x4[33] = x3[33];
-  x4[34] = x3[34];
-  x4[35] = x3[35];
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[36], x3[59], x4[36], x4[59]);
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[37], x3[58], x4[37], x4[58]);
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[38], x3[57], x4[38], x4[57]);
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[39], x3[56], x4[39], x4[56]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[40], x3[55], x4[40], x4[55]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[41], x3[54], x4[41], x4[54]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[42], x3[53], x4[42], x4[53]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[43], x3[52], x4[43], x4[52]);
-  x4[44] = x3[44];
-  x4[45] = x3[45];
-  x4[46] = x3[46];
-  x4[47] = x3[47];
-  x4[48] = x3[48];
-  x4[49] = x3[49];
-  x4[50] = x3[50];
-  x4[51] = x3[51];
-  x4[60] = x3[60];
-  x4[61] = x3[61];
-  x4[62] = x3[62];
-  x4[63] = x3[63];
-
-  // stage 5
-  __m128i x5[64];
-  x5[0] = _mm_adds_epi16(x4[0], x4[3]);
-  x5[3] = _mm_subs_epi16(x4[0], x4[3]);
-  x5[1] = _mm_adds_epi16(x4[1], x4[2]);
-  x5[2] = _mm_subs_epi16(x4[1], x4[2]);
-  x5[4] = x4[4];
-  btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x4[5], x4[6], x5[5], x5[6]);
-  x5[7] = x4[7];
-  x5[8] = _mm_adds_epi16(x4[8], x4[11]);
-  x5[11] = _mm_subs_epi16(x4[8], x4[11]);
-  x5[9] = _mm_adds_epi16(x4[9], x4[10]);
-  x5[10] = _mm_subs_epi16(x4[9], x4[10]);
-  x5[12] = _mm_subs_epi16(x4[15], x4[12]);
-  x5[15] = _mm_adds_epi16(x4[15], x4[12]);
-  x5[13] = _mm_subs_epi16(x4[14], x4[13]);
-  x5[14] = _mm_adds_epi16(x4[14], x4[13]);
-  x5[16] = x4[16];
-  x5[17] = x4[17];
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[18], x4[29], x5[18], x5[29]);
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[19], x4[28], x5[19], x5[28]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[20], x4[27], x5[20], x5[27]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[21], x4[26], x5[21], x5[26]);
-  x5[22] = x4[22];
-  x5[23] = x4[23];
-  x5[24] = x4[24];
-  x5[25] = x4[25];
-  x5[30] = x4[30];
-  x5[31] = x4[31];
-  x5[32] = _mm_adds_epi16(x4[32], x4[39]);
-  x5[39] = _mm_subs_epi16(x4[32], x4[39]);
-  x5[33] = _mm_adds_epi16(x4[33], x4[38]);
-  x5[38] = _mm_subs_epi16(x4[33], x4[38]);
-  x5[34] = _mm_adds_epi16(x4[34], x4[37]);
-  x5[37] = _mm_subs_epi16(x4[34], x4[37]);
-  x5[35] = _mm_adds_epi16(x4[35], x4[36]);
-  x5[36] = _mm_subs_epi16(x4[35], x4[36]);
-  x5[40] = _mm_subs_epi16(x4[47], x4[40]);
-  x5[47] = _mm_adds_epi16(x4[47], x4[40]);
-  x5[41] = _mm_subs_epi16(x4[46], x4[41]);
-  x5[46] = _mm_adds_epi16(x4[46], x4[41]);
-  x5[42] = _mm_subs_epi16(x4[45], x4[42]);
-  x5[45] = _mm_adds_epi16(x4[45], x4[42]);
-  x5[43] = _mm_subs_epi16(x4[44], x4[43]);
-  x5[44] = _mm_adds_epi16(x4[44], x4[43]);
-  x5[48] = _mm_adds_epi16(x4[48], x4[55]);
-  x5[55] = _mm_subs_epi16(x4[48], x4[55]);
-  x5[49] = _mm_adds_epi16(x4[49], x4[54]);
-  x5[54] = _mm_subs_epi16(x4[49], x4[54]);
-  x5[50] = _mm_adds_epi16(x4[50], x4[53]);
-  x5[53] = _mm_subs_epi16(x4[50], x4[53]);
-  x5[51] = _mm_adds_epi16(x4[51], x4[52]);
-  x5[52] = _mm_subs_epi16(x4[51], x4[52]);
-  x5[56] = _mm_subs_epi16(x4[63], x4[56]);
-  x5[63] = _mm_adds_epi16(x4[63], x4[56]);
-  x5[57] = _mm_subs_epi16(x4[62], x4[57]);
-  x5[62] = _mm_adds_epi16(x4[62], x4[57]);
-  x5[58] = _mm_subs_epi16(x4[61], x4[58]);
-  x5[61] = _mm_adds_epi16(x4[61], x4[58]);
-  x5[59] = _mm_subs_epi16(x4[60], x4[59]);
-  x5[60] = _mm_adds_epi16(x4[60], x4[59]);
-
-  // stage 6
-  __m128i x6[64];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x5[0], x5[1], x6[0], x6[1]);
-  btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x5[2], x5[3], x6[2], x6[3]);
-  x6[4] = _mm_adds_epi16(x5[4], x5[5]);
-  x6[5] = _mm_subs_epi16(x5[4], x5[5]);
-  x6[6] = _mm_subs_epi16(x5[7], x5[6]);
-  x6[7] = _mm_adds_epi16(x5[7], x5[6]);
-  x6[8] = x5[8];
-  btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x5[9], x5[14], x6[9], x6[14]);
-  btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x5[10], x5[13], x6[10], x6[13]);
-  x6[11] = x5[11];
-  x6[12] = x5[12];
-  x6[15] = x5[15];
-  x6[16] = _mm_adds_epi16(x5[16], x5[19]);
-  x6[19] = _mm_subs_epi16(x5[16], x5[19]);
-  x6[17] = _mm_adds_epi16(x5[17], x5[18]);
-  x6[18] = _mm_subs_epi16(x5[17], x5[18]);
-  x6[20] = _mm_subs_epi16(x5[23], x5[20]);
-  x6[23] = _mm_adds_epi16(x5[23], x5[20]);
-  x6[21] = _mm_subs_epi16(x5[22], x5[21]);
-  x6[22] = _mm_adds_epi16(x5[22], x5[21]);
-  x6[24] = _mm_adds_epi16(x5[24], x5[27]);
-  x6[27] = _mm_subs_epi16(x5[24], x5[27]);
-  x6[25] = _mm_adds_epi16(x5[25], x5[26]);
-  x6[26] = _mm_subs_epi16(x5[25], x5[26]);
-  x6[28] = _mm_subs_epi16(x5[31], x5[28]);
-  x6[31] = _mm_adds_epi16(x5[31], x5[28]);
-  x6[29] = _mm_subs_epi16(x5[30], x5[29]);
-  x6[30] = _mm_adds_epi16(x5[30], x5[29]);
-  x6[32] = x5[32];
-  x6[33] = x5[33];
-  btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[34], x5[61], x6[34], x6[61]);
-  btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[35], x5[60], x6[35], x6[60]);
-  btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[36], x5[59], x6[36], x6[59]);
-  btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[37], x5[58], x6[37], x6[58]);
-  x6[38] = x5[38];
-  x6[39] = x5[39];
-  x6[40] = x5[40];
-  x6[41] = x5[41];
-  btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[42], x5[53], x6[42], x6[53]);
-  btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[43], x5[52], x6[43], x6[52]);
-  btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[44], x5[51], x6[44], x6[51]);
-  btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[45], x5[50], x6[45], x6[50]);
-  x6[46] = x5[46];
-  x6[47] = x5[47];
-  x6[48] = x5[48];
-  x6[49] = x5[49];
-  x6[54] = x5[54];
-  x6[55] = x5[55];
-  x6[56] = x5[56];
-  x6[57] = x5[57];
-  x6[62] = x5[62];
-  x6[63] = x5[63];
-
-  // stage 7
-  __m128i x7[64];
-  x7[0] = x6[0];
-  x7[1] = x6[1];
-  x7[2] = x6[2];
-  x7[3] = x6[3];
-  btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x6[4], x6[7], x7[4], x7[7]);
-  btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x6[5], x6[6], x7[5], x7[6]);
-  x7[8] = _mm_adds_epi16(x6[8], x6[9]);
-  x7[9] = _mm_subs_epi16(x6[8], x6[9]);
-  x7[10] = _mm_subs_epi16(x6[11], x6[10]);
-  x7[11] = _mm_adds_epi16(x6[11], x6[10]);
-  x7[12] = _mm_adds_epi16(x6[12], x6[13]);
-  x7[13] = _mm_subs_epi16(x6[12], x6[13]);
-  x7[14] = _mm_subs_epi16(x6[15], x6[14]);
-  x7[15] = _mm_adds_epi16(x6[15], x6[14]);
-  x7[16] = x6[16];
-  btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x6[17], x6[30], x7[17], x7[30]);
-  btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x6[18], x6[29], x7[18], x7[29]);
-  x7[19] = x6[19];
-  x7[20] = x6[20];
-  btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x6[21], x6[26], x7[21], x7[26]);
-  btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x6[22], x6[25], x7[22], x7[25]);
-  x7[23] = x6[23];
-  x7[24] = x6[24];
-  x7[27] = x6[27];
-  x7[28] = x6[28];
-  x7[31] = x6[31];
-  x7[32] = _mm_adds_epi16(x6[32], x6[35]);
-  x7[35] = _mm_subs_epi16(x6[32], x6[35]);
-  x7[33] = _mm_adds_epi16(x6[33], x6[34]);
-  x7[34] = _mm_subs_epi16(x6[33], x6[34]);
-  x7[36] = _mm_subs_epi16(x6[39], x6[36]);
-  x7[39] = _mm_adds_epi16(x6[39], x6[36]);
-  x7[37] = _mm_subs_epi16(x6[38], x6[37]);
-  x7[38] = _mm_adds_epi16(x6[38], x6[37]);
-  x7[40] = _mm_adds_epi16(x6[40], x6[43]);
-  x7[43] = _mm_subs_epi16(x6[40], x6[43]);
-  x7[41] = _mm_adds_epi16(x6[41], x6[42]);
-  x7[42] = _mm_subs_epi16(x6[41], x6[42]);
-  x7[44] = _mm_subs_epi16(x6[47], x6[44]);
-  x7[47] = _mm_adds_epi16(x6[47], x6[44]);
-  x7[45] = _mm_subs_epi16(x6[46], x6[45]);
-  x7[46] = _mm_adds_epi16(x6[46], x6[45]);
-  x7[48] = _mm_adds_epi16(x6[48], x6[51]);
-  x7[51] = _mm_subs_epi16(x6[48], x6[51]);
-  x7[49] = _mm_adds_epi16(x6[49], x6[50]);
-  x7[50] = _mm_subs_epi16(x6[49], x6[50]);
-  x7[52] = _mm_subs_epi16(x6[55], x6[52]);
-  x7[55] = _mm_adds_epi16(x6[55], x6[52]);
-  x7[53] = _mm_subs_epi16(x6[54], x6[53]);
-  x7[54] = _mm_adds_epi16(x6[54], x6[53]);
-  x7[56] = _mm_adds_epi16(x6[56], x6[59]);
-  x7[59] = _mm_subs_epi16(x6[56], x6[59]);
-  x7[57] = _mm_adds_epi16(x6[57], x6[58]);
-  x7[58] = _mm_subs_epi16(x6[57], x6[58]);
-  x7[60] = _mm_subs_epi16(x6[63], x6[60]);
-  x7[63] = _mm_adds_epi16(x6[63], x6[60]);
-  x7[61] = _mm_subs_epi16(x6[62], x6[61]);
-  x7[62] = _mm_adds_epi16(x6[62], x6[61]);
-
-  // stage 8
-  __m128i x8[64];
-  x8[0] = x7[0];
-  x8[1] = x7[1];
-  x8[2] = x7[2];
-  x8[3] = x7[3];
-  x8[4] = x7[4];
-  x8[5] = x7[5];
-  x8[6] = x7[6];
-  x8[7] = x7[7];
-  btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x7[8], x7[15], x8[8], x8[15]);
-  btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x7[9], x7[14], x8[9], x8[14]);
-  btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x7[10], x7[13], x8[10], x8[13]);
-  btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x7[11], x7[12], x8[11], x8[12]);
-  x8[16] = _mm_adds_epi16(x7[16], x7[17]);
-  x8[17] = _mm_subs_epi16(x7[16], x7[17]);
-  x8[18] = _mm_subs_epi16(x7[19], x7[18]);
-  x8[19] = _mm_adds_epi16(x7[19], x7[18]);
-  x8[20] = _mm_adds_epi16(x7[20], x7[21]);
-  x8[21] = _mm_subs_epi16(x7[20], x7[21]);
-  x8[22] = _mm_subs_epi16(x7[23], x7[22]);
-  x8[23] = _mm_adds_epi16(x7[23], x7[22]);
-  x8[24] = _mm_adds_epi16(x7[24], x7[25]);
-  x8[25] = _mm_subs_epi16(x7[24], x7[25]);
-  x8[26] = _mm_subs_epi16(x7[27], x7[26]);
-  x8[27] = _mm_adds_epi16(x7[27], x7[26]);
-  x8[28] = _mm_adds_epi16(x7[28], x7[29]);
-  x8[29] = _mm_subs_epi16(x7[28], x7[29]);
-  x8[30] = _mm_subs_epi16(x7[31], x7[30]);
-  x8[31] = _mm_adds_epi16(x7[31], x7[30]);
-  x8[32] = x7[32];
-  btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x7[33], x7[62], x8[33], x8[62]);
-  btf_16_sse2(cospi_m60_m04, cospi_m04_p60, x7[34], x7[61], x8[34], x8[61]);
-  x8[35] = x7[35];
-  x8[36] = x7[36];
-  btf_16_sse2(cospi_m36_p28, cospi_p28_p36, x7[37], x7[58], x8[37], x8[58]);
-  btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x7[38], x7[57], x8[38], x8[57]);
-  x8[39] = x7[39];
-  x8[40] = x7[40];
-  btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x7[41], x7[54], x8[41], x8[54]);
-  btf_16_sse2(cospi_m44_m20, cospi_m20_p44, x7[42], x7[53], x8[42], x8[53]);
-  x8[43] = x7[43];
-  x8[44] = x7[44];
-  btf_16_sse2(cospi_m52_p12, cospi_p12_p52, x7[45], x7[50], x8[45], x8[50]);
-  btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x7[46], x7[49], x8[46], x8[49]);
-  x8[47] = x7[47];
-  x8[48] = x7[48];
-  x8[51] = x7[51];
-  x8[52] = x7[52];
-  x8[55] = x7[55];
-  x8[56] = x7[56];
-  x8[59] = x7[59];
-  x8[60] = x7[60];
-  x8[63] = x7[63];
-
-  // stage 9
-  __m128i x9[64];
-  x9[0] = x8[0];
-  x9[1] = x8[1];
-  x9[2] = x8[2];
-  x9[3] = x8[3];
-  x9[4] = x8[4];
-  x9[5] = x8[5];
-  x9[6] = x8[6];
-  x9[7] = x8[7];
-  x9[8] = x8[8];
-  x9[9] = x8[9];
-  x9[10] = x8[10];
-  x9[11] = x8[11];
-  x9[12] = x8[12];
-  x9[13] = x8[13];
-  x9[14] = x8[14];
-  x9[15] = x8[15];
-  btf_16_sse2(cospi_p62_p02, cospi_m02_p62, x8[16], x8[31], x9[16], x9[31]);
-  btf_16_sse2(cospi_p30_p34, cospi_m34_p30, x8[17], x8[30], x9[17], x9[30]);
-  btf_16_sse2(cospi_p46_p18, cospi_m18_p46, x8[18], x8[29], x9[18], x9[29]);
-  btf_16_sse2(cospi_p14_p50, cospi_m50_p14, x8[19], x8[28], x9[19], x9[28]);
-  btf_16_sse2(cospi_p54_p10, cospi_m10_p54, x8[20], x8[27], x9[20], x9[27]);
-  btf_16_sse2(cospi_p22_p42, cospi_m42_p22, x8[21], x8[26], x9[21], x9[26]);
-  btf_16_sse2(cospi_p38_p26, cospi_m26_p38, x8[22], x8[25], x9[22], x9[25]);
-  btf_16_sse2(cospi_p06_p58, cospi_m58_p06, x8[23], x8[24], x9[23], x9[24]);
-  x9[32] = _mm_adds_epi16(x8[32], x8[33]);
-  x9[33] = _mm_subs_epi16(x8[32], x8[33]);
-  x9[34] = _mm_subs_epi16(x8[35], x8[34]);
-  x9[35] = _mm_adds_epi16(x8[35], x8[34]);
-  x9[36] = _mm_adds_epi16(x8[36], x8[37]);
-  x9[37] = _mm_subs_epi16(x8[36], x8[37]);
-  x9[38] = _mm_subs_epi16(x8[39], x8[38]);
-  x9[39] = _mm_adds_epi16(x8[39], x8[38]);
-  x9[40] = _mm_adds_epi16(x8[40], x8[41]);
-  x9[41] = _mm_subs_epi16(x8[40], x8[41]);
-  x9[42] = _mm_subs_epi16(x8[43], x8[42]);
-  x9[43] = _mm_adds_epi16(x8[43], x8[42]);
-  x9[44] = _mm_adds_epi16(x8[44], x8[45]);
-  x9[45] = _mm_subs_epi16(x8[44], x8[45]);
-  x9[46] = _mm_subs_epi16(x8[47], x8[46]);
-  x9[47] = _mm_adds_epi16(x8[47], x8[46]);
-  x9[48] = _mm_adds_epi16(x8[48], x8[49]);
-  x9[49] = _mm_subs_epi16(x8[48], x8[49]);
-  x9[50] = _mm_subs_epi16(x8[51], x8[50]);
-  x9[51] = _mm_adds_epi16(x8[51], x8[50]);
-  x9[52] = _mm_adds_epi16(x8[52], x8[53]);
-  x9[53] = _mm_subs_epi16(x8[52], x8[53]);
-  x9[54] = _mm_subs_epi16(x8[55], x8[54]);
-  x9[55] = _mm_adds_epi16(x8[55], x8[54]);
-  x9[56] = _mm_adds_epi16(x8[56], x8[57]);
-  x9[57] = _mm_subs_epi16(x8[56], x8[57]);
-  x9[58] = _mm_subs_epi16(x8[59], x8[58]);
-  x9[59] = _mm_adds_epi16(x8[59], x8[58]);
-  x9[60] = _mm_adds_epi16(x8[60], x8[61]);
-  x9[61] = _mm_subs_epi16(x8[60], x8[61]);
-  x9[62] = _mm_subs_epi16(x8[63], x8[62]);
-  x9[63] = _mm_adds_epi16(x8[63], x8[62]);
-
-  // stage 10
-  __m128i x10[64];
-  x10[0] = x9[0];
-  x10[1] = x9[1];
-  x10[2] = x9[2];
-  x10[3] = x9[3];
-  x10[4] = x9[4];
-  x10[5] = x9[5];
-  x10[6] = x9[6];
-  x10[7] = x9[7];
-  x10[8] = x9[8];
-  x10[9] = x9[9];
-  x10[10] = x9[10];
-  x10[11] = x9[11];
-  x10[12] = x9[12];
-  x10[13] = x9[13];
-  x10[14] = x9[14];
-  x10[15] = x9[15];
-  x10[16] = x9[16];
-  x10[17] = x9[17];
-  x10[18] = x9[18];
-  x10[19] = x9[19];
-  x10[20] = x9[20];
-  x10[21] = x9[21];
-  x10[22] = x9[22];
-  x10[23] = x9[23];
-  x10[24] = x9[24];
-  x10[25] = x9[25];
-  x10[26] = x9[26];
-  x10[27] = x9[27];
-  x10[28] = x9[28];
-  x10[29] = x9[29];
-  x10[30] = x9[30];
-  x10[31] = x9[31];
-  btf_16_sse2(cospi_p63_p01, cospi_m01_p63, x9[32], x9[63], x10[32], x10[63]);
-  btf_16_sse2(cospi_p31_p33, cospi_m33_p31, x9[33], x9[62], x10[33], x10[62]);
-  btf_16_sse2(cospi_p47_p17, cospi_m17_p47, x9[34], x9[61], x10[34], x10[61]);
-  btf_16_sse2(cospi_p15_p49, cospi_m49_p15, x9[35], x9[60], x10[35], x10[60]);
-  btf_16_sse2(cospi_p55_p09, cospi_m09_p55, x9[36], x9[59], x10[36], x10[59]);
-  btf_16_sse2(cospi_p23_p41, cospi_m41_p23, x9[37], x9[58], x10[37], x10[58]);
-  btf_16_sse2(cospi_p39_p25, cospi_m25_p39, x9[38], x9[57], x10[38], x10[57]);
-  btf_16_sse2(cospi_p07_p57, cospi_m57_p07, x9[39], x9[56], x10[39], x10[56]);
-  btf_16_sse2(cospi_p59_p05, cospi_m05_p59, x9[40], x9[55], x10[40], x10[55]);
-  btf_16_sse2(cospi_p27_p37, cospi_m37_p27, x9[41], x9[54], x10[41], x10[54]);
-  btf_16_sse2(cospi_p43_p21, cospi_m21_p43, x9[42], x9[53], x10[42], x10[53]);
-  btf_16_sse2(cospi_p11_p53, cospi_m53_p11, x9[43], x9[52], x10[43], x10[52]);
-  btf_16_sse2(cospi_p51_p13, cospi_m13_p51, x9[44], x9[51], x10[44], x10[51]);
-  btf_16_sse2(cospi_p19_p45, cospi_m45_p19, x9[45], x9[50], x10[45], x10[50]);
-  btf_16_sse2(cospi_p35_p29, cospi_m29_p35, x9[46], x9[49], x10[46], x10[49]);
-  btf_16_sse2(cospi_p03_p61, cospi_m61_p03, x9[47], x9[48], x10[47], x10[48]);
-
-  // stage 11
-  output[0] = x10[0];
-  output[1] = x10[32];
-  output[2] = x10[16];
-  output[3] = x10[48];
-  output[4] = x10[8];
-  output[5] = x10[40];
-  output[6] = x10[24];
-  output[7] = x10[56];
-  output[8] = x10[4];
-  output[9] = x10[36];
-  output[10] = x10[20];
-  output[11] = x10[52];
-  output[12] = x10[12];
-  output[13] = x10[44];
-  output[14] = x10[28];
-  output[15] = x10[60];
-  output[16] = x10[2];
-  output[17] = x10[34];
-  output[18] = x10[18];
-  output[19] = x10[50];
-  output[20] = x10[10];
-  output[21] = x10[42];
-  output[22] = x10[26];
-  output[23] = x10[58];
-  output[24] = x10[6];
-  output[25] = x10[38];
-  output[26] = x10[22];
-  output[27] = x10[54];
-  output[28] = x10[14];
-  output[29] = x10[46];
-  output[30] = x10[30];
-  output[31] = x10[62];
-  output[32] = x10[1];
-  output[33] = x10[33];
-  output[34] = x10[17];
-  output[35] = x10[49];
-  output[36] = x10[9];
-  output[37] = x10[41];
-  output[38] = x10[25];
-  output[39] = x10[57];
-  output[40] = x10[5];
-  output[41] = x10[37];
-  output[42] = x10[21];
-  output[43] = x10[53];
-  output[44] = x10[13];
-  output[45] = x10[45];
-  output[46] = x10[29];
-  output[47] = x10[61];
-  output[48] = x10[3];
-  output[49] = x10[35];
-  output[50] = x10[19];
-  output[51] = x10[51];
-  output[52] = x10[11];
-  output[53] = x10[43];
-  output[54] = x10[27];
-  output[55] = x10[59];
-  output[56] = x10[7];
-  output[57] = x10[39];
-  output[58] = x10[23];
-  output[59] = x10[55];
-  output[60] = x10[15];
-  output[61] = x10[47];
-  output[62] = x10[31];
-  output[63] = x10[63];
-}
-
-static void fadst4x4_new_sse2(const __m128i *input, __m128i *output,
-                              int8_t cos_bit) {
-  const int32_t *sinpi = sinpi_arr(cos_bit);
-  const __m128i sinpi_p01_p02 = pair_set_epi16(sinpi[1], sinpi[2]);
-  const __m128i sinpi_p04_m01 = pair_set_epi16(sinpi[4], -sinpi[1]);
-  const __m128i sinpi_p03_p04 = pair_set_epi16(sinpi[3], sinpi[4]);
-  const __m128i sinpi_m03_p02 = pair_set_epi16(-sinpi[3], sinpi[2]);
-  const __m128i sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi[3]);
-  const __m128i __zero = _mm_set1_epi16(0);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-  const __m128i in7 = _mm_add_epi16(input[0], input[1]);
-  __m128i u[8], v[8];
-
-  u[0] = _mm_unpacklo_epi16(input[0], input[1]);
-  u[1] = _mm_unpacklo_epi16(input[2], input[3]);
-  u[2] = _mm_unpacklo_epi16(in7, __zero);
-  u[3] = _mm_unpacklo_epi16(input[2], __zero);
-  u[4] = _mm_unpacklo_epi16(input[3], __zero);
-
-  v[0] = _mm_madd_epi16(u[0], sinpi_p01_p02);  // s0 + s2
-  v[1] = _mm_madd_epi16(u[1], sinpi_p03_p04);  // s4 + s5
-  v[2] = _mm_madd_epi16(u[2], sinpi_p03_p03);  // x1
-  v[3] = _mm_madd_epi16(u[0], sinpi_p04_m01);  // s1 - s3
-  v[4] = _mm_madd_epi16(u[1], sinpi_m03_p02);  // -s4 + s6
-  v[5] = _mm_madd_epi16(u[3], sinpi_p03_p03);  // s4
-  v[6] = _mm_madd_epi16(u[4], sinpi_p03_p03);
-
-  u[0] = _mm_add_epi32(v[0], v[1]);
-  u[1] = _mm_sub_epi32(v[2], v[6]);
-  u[2] = _mm_add_epi32(v[3], v[4]);
-  u[3] = _mm_sub_epi32(u[2], u[0]);
-  u[4] = _mm_slli_epi32(v[5], 2);
-  u[5] = _mm_sub_epi32(u[4], v[5]);
-  u[6] = _mm_add_epi32(u[3], u[5]);
-
-  v[0] = _mm_add_epi32(u[0], __rounding);
-  v[1] = _mm_add_epi32(u[1], __rounding);
-  v[2] = _mm_add_epi32(u[2], __rounding);
-  v[3] = _mm_add_epi32(u[6], __rounding);
-
-  u[0] = _mm_srai_epi32(v[0], cos_bit);
-  u[1] = _mm_srai_epi32(v[1], cos_bit);
-  u[2] = _mm_srai_epi32(v[2], cos_bit);
-  u[3] = _mm_srai_epi32(v[3], cos_bit);
-
-  output[0] = _mm_packs_epi32(u[0], u[2]);
-  output[1] = _mm_packs_epi32(u[1], u[3]);
-  output[2] = _mm_srli_si128(output[0], 8);
-  output[3] = _mm_srli_si128(output[1], 8);
-}
-
-static void fadst4x8_new_sse2(const __m128i *input, __m128i *output,
-                              int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __zero = _mm_setzero_si128();
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
-  __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
-  __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
-  __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
-  __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
-  __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
-  __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
-  __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
-  __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
-  __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
-  __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
-
-  // stage 1
-  __m128i x1[8];
-  x1[0] = input[0];
-  x1[1] = _mm_subs_epi16(__zero, input[7]);
-  x1[2] = _mm_subs_epi16(__zero, input[3]);
-  x1[3] = input[4];
-  x1[4] = _mm_subs_epi16(__zero, input[1]);
-  x1[5] = input[6];
-  x1[6] = input[2];
-  x1[7] = _mm_subs_epi16(__zero, input[5]);
-
-  // stage 2
-  __m128i x2[8];
-  x2[0] = x1[0];
-  x2[1] = x1[1];
-  btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x1[2],
-                 &x1[3], &x2[2], &x2[3]);
-  x2[4] = x1[4];
-  x2[5] = x1[5];
-  btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x1[6],
-                 &x1[7], &x2[6], &x2[7]);
-
-  // stage 3
-  __m128i x3[8];
-  x3[0] = _mm_adds_epi16(x2[0], x2[2]);
-  x3[2] = _mm_subs_epi16(x2[0], x2[2]);
-  x3[1] = _mm_adds_epi16(x2[1], x2[3]);
-  x3[3] = _mm_subs_epi16(x2[1], x2[3]);
-  x3[4] = _mm_adds_epi16(x2[4], x2[6]);
-  x3[6] = _mm_subs_epi16(x2[4], x2[6]);
-  x3[5] = _mm_adds_epi16(x2[5], x2[7]);
-  x3[7] = _mm_subs_epi16(x2[5], x2[7]);
-
-  // stage 4
-  __m128i x4[8];
-  x4[0] = x3[0];
-  x4[1] = x3[1];
-  x4[2] = x3[2];
-  x4[3] = x3[3];
-  btf_16_w4_sse2(&cospi_p16_p48, &cospi_p48_m16, __rounding, cos_bit, &x3[4],
-                 &x3[5], &x4[4], &x4[5]);
-  btf_16_w4_sse2(&cospi_m48_p16, &cospi_p16_p48, __rounding, cos_bit, &x3[6],
-                 &x3[7], &x4[6], &x4[7]);
-
-  // stage 5
-  __m128i x5[8];
-  x5[0] = _mm_adds_epi16(x4[0], x4[4]);
-  x5[4] = _mm_subs_epi16(x4[0], x4[4]);
-  x5[1] = _mm_adds_epi16(x4[1], x4[5]);
-  x5[5] = _mm_subs_epi16(x4[1], x4[5]);
-  x5[2] = _mm_adds_epi16(x4[2], x4[6]);
-  x5[6] = _mm_subs_epi16(x4[2], x4[6]);
-  x5[3] = _mm_adds_epi16(x4[3], x4[7]);
-  x5[7] = _mm_subs_epi16(x4[3], x4[7]);
-
-  // stage 6
-  __m128i x6[8];
-  btf_16_w4_sse2(&cospi_p04_p60, &cospi_p60_m04, __rounding, cos_bit, &x5[0],
-                 &x5[1], &x6[0], &x6[1]);
-  btf_16_w4_sse2(&cospi_p20_p44, &cospi_p44_m20, __rounding, cos_bit, &x5[2],
-                 &x5[3], &x6[2], &x6[3]);
-  btf_16_w4_sse2(&cospi_p36_p28, &cospi_p28_m36, __rounding, cos_bit, &x5[4],
-                 &x5[5], &x6[4], &x6[5]);
-  btf_16_w4_sse2(&cospi_p52_p12, &cospi_p12_m52, __rounding, cos_bit, &x5[6],
-                 &x5[7], &x6[6], &x6[7]);
-
-  // stage 7
-  output[0] = x6[1];
-  output[1] = x6[6];
-  output[2] = x6[3];
-  output[3] = x6[4];
-  output[4] = x6[5];
-  output[5] = x6[2];
-  output[6] = x6[7];
-  output[7] = x6[0];
-}
-
-static void fadst8x4_new_sse2(const __m128i *input, __m128i *output,
-                              int8_t cos_bit) {
-  const int32_t *sinpi = sinpi_arr(cos_bit);
-  const __m128i sinpi_p01_p02 = pair_set_epi16(sinpi[1], sinpi[2]);
-  const __m128i sinpi_p04_m01 = pair_set_epi16(sinpi[4], -sinpi[1]);
-  const __m128i sinpi_p03_p04 = pair_set_epi16(sinpi[3], sinpi[4]);
-  const __m128i sinpi_m03_p02 = pair_set_epi16(-sinpi[3], sinpi[2]);
-  const __m128i sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi[3]);
-  const __m128i __zero = _mm_set1_epi16(0);
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-  const __m128i in7 = _mm_add_epi16(input[0], input[1]);
-  __m128i u_lo[8], u_hi[8], v_lo[8], v_hi[8];
-
-  u_lo[0] = _mm_unpacklo_epi16(input[0], input[1]);
-  u_hi[0] = _mm_unpackhi_epi16(input[0], input[1]);
-  u_lo[1] = _mm_unpacklo_epi16(input[2], input[3]);
-  u_hi[1] = _mm_unpackhi_epi16(input[2], input[3]);
-  u_lo[2] = _mm_unpacklo_epi16(in7, __zero);
-  u_hi[2] = _mm_unpackhi_epi16(in7, __zero);
-  u_lo[3] = _mm_unpacklo_epi16(input[2], __zero);
-  u_hi[3] = _mm_unpackhi_epi16(input[2], __zero);
-  u_lo[4] = _mm_unpacklo_epi16(input[3], __zero);
-  u_hi[4] = _mm_unpackhi_epi16(input[3], __zero);
-
-  v_lo[0] = _mm_madd_epi16(u_lo[0], sinpi_p01_p02);  // s0 + s2
-  v_hi[0] = _mm_madd_epi16(u_hi[0], sinpi_p01_p02);  // s0 + s2
-  v_lo[1] = _mm_madd_epi16(u_lo[1], sinpi_p03_p04);  // s4 + s5
-  v_hi[1] = _mm_madd_epi16(u_hi[1], sinpi_p03_p04);  // s4 + s5
-  v_lo[2] = _mm_madd_epi16(u_lo[2], sinpi_p03_p03);  // x1
-  v_hi[2] = _mm_madd_epi16(u_hi[2], sinpi_p03_p03);  // x1
-  v_lo[3] = _mm_madd_epi16(u_lo[0], sinpi_p04_m01);  // s1 - s3
-  v_hi[3] = _mm_madd_epi16(u_hi[0], sinpi_p04_m01);  // s1 - s3
-  v_lo[4] = _mm_madd_epi16(u_lo[1], sinpi_m03_p02);  // -s4 + s6
-  v_hi[4] = _mm_madd_epi16(u_hi[1], sinpi_m03_p02);  // -s4 + s6
-  v_lo[5] = _mm_madd_epi16(u_lo[3], sinpi_p03_p03);  // s4
-  v_hi[5] = _mm_madd_epi16(u_hi[3], sinpi_p03_p03);  // s4
-  v_lo[6] = _mm_madd_epi16(u_lo[4], sinpi_p03_p03);
-  v_hi[6] = _mm_madd_epi16(u_hi[4], sinpi_p03_p03);
-
-  u_lo[0] = _mm_add_epi32(v_lo[0], v_lo[1]);
-  u_hi[0] = _mm_add_epi32(v_hi[0], v_hi[1]);
-  u_lo[1] = _mm_sub_epi32(v_lo[2], v_lo[6]);
-  u_hi[1] = _mm_sub_epi32(v_hi[2], v_hi[6]);
-  u_lo[2] = _mm_add_epi32(v_lo[3], v_lo[4]);
-  u_hi[2] = _mm_add_epi32(v_hi[3], v_hi[4]);
-  u_lo[3] = _mm_sub_epi32(u_lo[2], u_lo[0]);
-  u_hi[3] = _mm_sub_epi32(u_hi[2], u_hi[0]);
-  u_lo[4] = _mm_slli_epi32(v_lo[5], 2);
-  u_hi[4] = _mm_slli_epi32(v_hi[5], 2);
-  u_lo[5] = _mm_sub_epi32(u_lo[4], v_lo[5]);
-  u_hi[5] = _mm_sub_epi32(u_hi[4], v_hi[5]);
-  u_lo[6] = _mm_add_epi32(u_lo[3], u_lo[5]);
-  u_hi[6] = _mm_add_epi32(u_hi[3], u_hi[5]);
-
-  v_lo[0] = _mm_add_epi32(u_lo[0], __rounding);
-  v_hi[0] = _mm_add_epi32(u_hi[0], __rounding);
-  v_lo[1] = _mm_add_epi32(u_lo[1], __rounding);
-  v_hi[1] = _mm_add_epi32(u_hi[1], __rounding);
-  v_lo[2] = _mm_add_epi32(u_lo[2], __rounding);
-  v_hi[2] = _mm_add_epi32(u_hi[2], __rounding);
-  v_lo[3] = _mm_add_epi32(u_lo[6], __rounding);
-  v_hi[3] = _mm_add_epi32(u_hi[6], __rounding);
-
-  u_lo[0] = _mm_srai_epi32(v_lo[0], cos_bit);
-  u_hi[0] = _mm_srai_epi32(v_hi[0], cos_bit);
-  u_lo[1] = _mm_srai_epi32(v_lo[1], cos_bit);
-  u_hi[1] = _mm_srai_epi32(v_hi[1], cos_bit);
-  u_lo[2] = _mm_srai_epi32(v_lo[2], cos_bit);
-  u_hi[2] = _mm_srai_epi32(v_hi[2], cos_bit);
-  u_lo[3] = _mm_srai_epi32(v_lo[3], cos_bit);
-  u_hi[3] = _mm_srai_epi32(v_hi[3], cos_bit);
-
-  output[0] = _mm_packs_epi32(u_lo[0], u_hi[0]);
-  output[1] = _mm_packs_epi32(u_lo[1], u_hi[1]);
-  output[2] = _mm_packs_epi32(u_lo[2], u_hi[2]);
-  output[3] = _mm_packs_epi32(u_lo[3], u_hi[3]);
-}
-
-static void fadst8x8_new_sse2(const __m128i *input, __m128i *output,
-                              int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __zero = _mm_setzero_si128();
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
-  __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
-  __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
-  __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]);
-  __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]);
-  __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]);
-  __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]);
-  __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]);
-  __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]);
-  __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]);
-  __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]);
-
-  // stage 1
-  __m128i x1[8];
-  x1[0] = input[0];
-  x1[1] = _mm_subs_epi16(__zero, input[7]);
-  x1[2] = _mm_subs_epi16(__zero, input[3]);
-  x1[3] = input[4];
-  x1[4] = _mm_subs_epi16(__zero, input[1]);
-  x1[5] = input[6];
-  x1[6] = input[2];
-  x1[7] = _mm_subs_epi16(__zero, input[5]);
-
-  // stage 2
-  __m128i x2[8];
-  x2[0] = x1[0];
-  x2[1] = x1[1];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[2], x1[3], x2[2], x2[3]);
-  x2[4] = x1[4];
-  x2[5] = x1[5];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[6], x1[7], x2[6], x2[7]);
-
-  // stage 3
-  __m128i x3[8];
-  x3[0] = _mm_adds_epi16(x2[0], x2[2]);
-  x3[2] = _mm_subs_epi16(x2[0], x2[2]);
-  x3[1] = _mm_adds_epi16(x2[1], x2[3]);
-  x3[3] = _mm_subs_epi16(x2[1], x2[3]);
-  x3[4] = _mm_adds_epi16(x2[4], x2[6]);
-  x3[6] = _mm_subs_epi16(x2[4], x2[6]);
-  x3[5] = _mm_adds_epi16(x2[5], x2[7]);
-  x3[7] = _mm_subs_epi16(x2[5], x2[7]);
-
-  // stage 4
-  __m128i x4[8];
-  x4[0] = x3[0];
-  x4[1] = x3[1];
-  x4[2] = x3[2];
-  x4[3] = x3[3];
-  btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[4], x3[5], x4[4], x4[5]);
-  btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[6], x3[7], x4[6], x4[7]);
-
-  // stage 5
-  __m128i x5[8];
-  x5[0] = _mm_adds_epi16(x4[0], x4[4]);
-  x5[4] = _mm_subs_epi16(x4[0], x4[4]);
-  x5[1] = _mm_adds_epi16(x4[1], x4[5]);
-  x5[5] = _mm_subs_epi16(x4[1], x4[5]);
-  x5[2] = _mm_adds_epi16(x4[2], x4[6]);
-  x5[6] = _mm_subs_epi16(x4[2], x4[6]);
-  x5[3] = _mm_adds_epi16(x4[3], x4[7]);
-  x5[7] = _mm_subs_epi16(x4[3], x4[7]);
-
-  // stage 6
-  __m128i x6[8];
-  btf_16_sse2(cospi_p04_p60, cospi_p60_m04, x5[0], x5[1], x6[0], x6[1]);
-  btf_16_sse2(cospi_p20_p44, cospi_p44_m20, x5[2], x5[3], x6[2], x6[3]);
-  btf_16_sse2(cospi_p36_p28, cospi_p28_m36, x5[4], x5[5], x6[4], x6[5]);
-  btf_16_sse2(cospi_p52_p12, cospi_p12_m52, x5[6], x5[7], x6[6], x6[7]);
-
-  // stage 7
-  output[0] = x6[1];
-  output[1] = x6[6];
-  output[2] = x6[3];
-  output[3] = x6[4];
-  output[4] = x6[5];
-  output[5] = x6[2];
-  output[6] = x6[7];
-  output[7] = x6[0];
-}
-
-static void fadst8x16_new_sse2(const __m128i *input, __m128i *output,
-                               int8_t cos_bit) {
-  const int32_t *cospi = cospi_arr(cos_bit);
-  const __m128i __zero = _mm_setzero_si128();
-  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
-
-  __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]);
-  __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]);
-  __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]);
-  __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]);
-  __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]);
-  __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]);
-  __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]);
-  __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]);
-  __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]);
-  __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]);
-  __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]);
-  __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]);
-  __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]);
-  __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]);
-  __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]);
-  __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]);
-  __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]);
-  __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]);
-  __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]);
-  __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]);
-  __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]);
-  __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]);
-  __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]);
-  __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]);
-  __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]);
-  __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]);
-  __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]);
-
-  // stage 1
-  __m128i x1[16];
-  x1[0] = input[0];
-  x1[1] = _mm_subs_epi16(__zero, input[15]);
-  x1[2] = _mm_subs_epi16(__zero, input[7]);
-  x1[3] = input[8];
-  x1[4] = _mm_subs_epi16(__zero, input[3]);
-  x1[5] = input[12];
-  x1[6] = input[4];
-  x1[7] = _mm_subs_epi16(__zero, input[11]);
-  x1[8] = _mm_subs_epi16(__zero, input[1]);
-  x1[9] = input[14];
-  x1[10] = input[6];
-  x1[11] = _mm_subs_epi16(__zero, input[9]);
-  x1[12] = input[2];
-  x1[13] = _mm_subs_epi16(__zero, input[13]);
-  x1[14] = _mm_subs_epi16(__zero, input[5]);
-  x1[15] = input[10];
-
-  // stage 2
-  __m128i x2[16];
-  x2[0] = x1[0];
-  x2[1] = x1[1];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[2], x1[3], x2[2], x2[3]);
-  x2[4] = x1[4];
-  x2[5] = x1[5];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[6], x1[7], x2[6], x2[7]);
-  x2[8] = x1[8];
-  x2[9] = x1[9];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[10], x1[11], x2[10], x2[11]);
-  x2[12] = x1[12];
-  x2[13] = x1[13];
-  btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[14], x1[15], x2[14], x2[15]);
-
-  // stage 3
-  __m128i x3[16];
-  x3[0] = _mm_adds_epi16(x2[0], x2[2]);
-  x3[2] = _mm_subs_epi16(x2[0], x2[2]);
-  x3[1] = _mm_adds_epi16(x2[1], x2[3]);
-  x3[3] = _mm_subs_epi16(x2[1], x2[3]);
-  x3[4] = _mm_adds_epi16(x2[4], x2[6]);
-  x3[6] = _mm_subs_epi16(x2[4], x2[6]);
-  x3[5] = _mm_adds_epi16(x2[5], x2[7]);
-  x3[7] = _mm_subs_epi16(x2[5], x2[7]);
-  x3[8] = _mm_adds_epi16(x2[8], x2[10]);
-  x3[10] = _mm_subs_epi16(x2[8], x2[10]);
-  x3[9] = _mm_adds_epi16(x2[9], x2[11]);
-  x3[11] = _mm_subs_epi16(x2[9], x2[11]);
-  x3[12] = _mm_adds_epi16(x2[12], x2[14]);
-  x3[14] = _mm_subs_epi16(x2[12], x2[14]);
-  x3[13] = _mm_adds_epi16(x2[13], x2[15]);
-  x3[15] = _mm_subs_epi16(x2[13], x2[15]);
-
-  // stage 4
-  __m128i x4[16];
-  x4[0] = x3[0];
-  x4[1] = x3[1];
-  x4[2] = x3[2];
-  x4[3] = x3[3];
-  btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[4], x3[5], x4[4], x4[5]);
-  btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[6], x3[7], x4[6], x4[7]);
-  x4[8] = x3[8];
-  x4[9] = x3[9];
-  x4[10] = x3[10];
-  x4[11] = x3[11];
-  btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[12], x3[13], x4[12], x4[13]);
-  btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[14], x3[15], x4[14], x4[15]);
-
-  // stage 5
-  __m128i x5[16];
-  x5[0] = _mm_adds_epi16(x4[0], x4[4]);
-  x5[4] = _mm_subs_epi16(x4[0], x4[4]);
-  x5[1] = _mm_adds_epi16(x4[1], x4[5]);
-  x5[5] = _mm_subs_epi16(x4[1], x4[5]);
-  x5[2] = _mm_adds_epi16(x4[2], x4[6]);
-  x5[6] = _mm_subs_epi16(x4[2], x4[6]);
-  x5[3] = _mm_adds_epi16(x4[3], x4[7]);
-  x5[7] = _mm_subs_epi16(x4[3], x4[7]);
-  x5[8] = _mm_adds_epi16(x4[8], x4[12]);
-  x5[12] = _mm_subs_epi16(x4[8], x4[12]);
-  x5[9] = _mm_adds_epi16(x4[9], x4[13]);
-  x5[13] = _mm_subs_epi16(x4[9], x4[13]);
-  x5[10] = _mm_adds_epi16(x4[10], x4[14]);
-  x5[14] = _mm_subs_epi16(x4[10], x4[14]);
-  x5[11] = _mm_adds_epi16(x4[11], x4[15]);
-  x5[15] = _mm_subs_epi16(x4[11], x4[15]);
-
-  // stage 6
-  __m128i x6[16];
-  x6[0] = x5[0];
-  x6[1] = x5[1];
-  x6[2] = x5[2];
-  x6[3] = x5[3];
-  x6[4] = x5[4];
-  x6[5] = x5[5];
-  x6[6] = x5[6];
-  x6[7] = x5[7];
-  btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x5[8], x5[9], x6[8], x6[9]);
-  btf_16_sse2(cospi_p40_p24, cospi_p24_m40, x5[10], x5[11], x6[10], x6[11]);
-  btf_16_sse2(cospi_m56_p08, cospi_p08_p56, x5[12], x5[13], x6[12], x6[13]);
-  btf_16_sse2(cospi_m24_p40, cospi_p40_p24, x5[14], x5[15], x6[14], x6[15]);
-
-  // stage 7
-  __m128i x7[16];
-  x7[0] = _mm_adds_epi16(x6[0], x6[8]);
-  x7[8] = _mm_subs_epi16(x6[0], x6[8]);
-  x7[1] = _mm_adds_epi16(x6[1], x6[9]);
-  x7[9] = _mm_subs_epi16(x6[1], x6[9]);
-  x7[2] = _mm_adds_epi16(x6[2], x6[10]);
-  x7[10] = _mm_subs_epi16(x6[2], x6[10]);
-  x7[3] = _mm_adds_epi16(x6[3], x6[11]);
-  x7[11] = _mm_subs_epi16(x6[3], x6[11]);
-  x7[4] = _mm_adds_epi16(x6[4], x6[12]);
-  x7[12] = _mm_subs_epi16(x6[4], x6[12]);
-  x7[5] = _mm_adds_epi16(x6[5], x6[13]);
-  x7[13] = _mm_subs_epi16(x6[5], x6[13]);
-  x7[6] = _mm_adds_epi16(x6[6], x6[14]);
-  x7[14] = _mm_subs_epi16(x6[6], x6[14]);
-  x7[7] = _mm_adds_epi16(x6[7], x6[15]);
-  x7[15] = _mm_subs_epi16(x6[7], x6[15]);
-
-  // stage 8
-  __m128i x8[16];
-  btf_16_sse2(cospi_p02_p62, cospi_p62_m02, x7[0], x7[1], x8[0], x8[1]);
-  btf_16_sse2(cospi_p10_p54, cospi_p54_m10, x7[2], x7[3], x8[2], x8[3]);
-  btf_16_sse2(cospi_p18_p46, cospi_p46_m18, x7[4], x7[5], x8[4], x8[5]);
-  btf_16_sse2(cospi_p26_p38, cospi_p38_m26, x7[6], x7[7], x8[6], x8[7]);
-  btf_16_sse2(cospi_p34_p30, cospi_p30_m34, x7[8], x7[9], x8[8], x8[9]);
-  btf_16_sse2(cospi_p42_p22, cospi_p22_m42, x7[10], x7[11], x8[10], x8[11]);
-  btf_16_sse2(cospi_p50_p14, cospi_p14_m50, x7[12], x7[13], x8[12], x8[13]);
-  btf_16_sse2(cospi_p58_p06, cospi_p06_m58, x7[14], x7[15], x8[14], x8[15]);
-
-  // stage 9
-  output[0] = x8[1];
-  output[1] = x8[14];
-  output[2] = x8[3];
-  output[3] = x8[12];
-  output[4] = x8[5];
-  output[5] = x8[10];
-  output[6] = x8[7];
-  output[7] = x8[8];
-  output[8] = x8[9];
-  output[9] = x8[6];
-  output[10] = x8[11];
-  output[11] = x8[4];
-  output[12] = x8[13];
-  output[13] = x8[2];
-  output[14] = x8[15];
-  output[15] = x8[0];
-}
-
-static const transform_1d_sse2 col_txfm4x4_arr[TX_TYPES] = {
-  fdct4x4_new_sse2,       // DCT_DCT
-  fadst4x4_new_sse2,      // ADST_DCT
-  fdct4x4_new_sse2,       // DCT_ADST
-  fadst4x4_new_sse2,      // ADST_ADST
-  fadst4x4_new_sse2,      // FLIPADST_DCT
-  fdct4x4_new_sse2,       // DCT_FLIPADST
-  fadst4x4_new_sse2,      // FLIPADST_FLIPADST
-  fadst4x4_new_sse2,      // ADST_FLIPADST
-  fadst4x4_new_sse2,      // FLIPADST_ADST
-  fidentity4x4_new_sse2,  // IDTX
-  fdct4x4_new_sse2,       // V_DCT
-  fidentity4x4_new_sse2,  // H_DCT
-  fadst4x4_new_sse2,      // V_ADST
-  fidentity4x4_new_sse2,  // H_ADST
-  fadst4x4_new_sse2,      // V_FLIPADST
-  fidentity4x4_new_sse2   // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm4x4_arr[TX_TYPES] = {
-  fdct4x4_new_sse2,       // DCT_DCT
-  fdct4x4_new_sse2,       // ADST_DCT
-  fadst4x4_new_sse2,      // DCT_ADST
-  fadst4x4_new_sse2,      // ADST_ADST
-  fdct4x4_new_sse2,       // FLIPADST_DCT
-  fadst4x4_new_sse2,      // DCT_FLIPADST
-  fadst4x4_new_sse2,      // FLIPADST_FLIPADST
-  fadst4x4_new_sse2,      // ADST_FLIPADST
-  fadst4x4_new_sse2,      // FLIPADST_ADST
-  fidentity4x4_new_sse2,  // IDTX
-  fidentity4x4_new_sse2,  // V_DCT
-  fdct4x4_new_sse2,       // H_DCT
-  fidentity4x4_new_sse2,  // V_ADST
-  fadst4x4_new_sse2,      // H_ADST
-  fidentity4x4_new_sse2,  // V_FLIPADST
-  fadst4x4_new_sse2       // H_FLIPADST
-};
-
-static const transform_1d_sse2 col_txfm4x8_arr[TX_TYPES] = {
-  fdct4x8_new_sse2,       // DCT_DCT
-  fadst4x8_new_sse2,      // ADST_DCT
-  fdct4x8_new_sse2,       // DCT_ADST
-  fadst4x8_new_sse2,      // ADST_ADST
-  fadst4x8_new_sse2,      // FLIPADST_DCT
-  fdct4x8_new_sse2,       // DCT_FLIPADST
-  fadst4x8_new_sse2,      // FLIPADST_FLIPADST
-  fadst4x8_new_sse2,      // ADST_FLIPADST
-  fadst4x8_new_sse2,      // FLIPADST_ADST
-  fidentity8x8_new_sse2,  // IDTX
-  fdct4x8_new_sse2,       // V_DCT
-  fidentity8x8_new_sse2,  // H_DCT
-  fadst4x8_new_sse2,      // V_ADST
-  fidentity8x8_new_sse2,  // H_ADST
-  fadst4x8_new_sse2,      // V_FLIPADST
-  fidentity8x8_new_sse2   // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm8x4_arr[TX_TYPES] = {
-  fdct8x4_new_sse2,       // DCT_DCT
-  fdct8x4_new_sse2,       // ADST_DCT
-  fadst8x4_new_sse2,      // DCT_ADST
-  fadst8x4_new_sse2,      // ADST_ADST
-  fdct8x4_new_sse2,       // FLIPADST_DCT
-  fadst8x4_new_sse2,      // DCT_FLIPADST
-  fadst8x4_new_sse2,      // FLIPADST_FLIPADST
-  fadst8x4_new_sse2,      // ADST_FLIPADST
-  fadst8x4_new_sse2,      // FLIPADST_ADST
-  fidentity8x4_new_sse2,  // IDTX
-  fidentity8x4_new_sse2,  // V_DCT
-  fdct8x4_new_sse2,       // H_DCT
-  fidentity8x4_new_sse2,  // V_ADST
-  fadst8x4_new_sse2,      // H_ADST
-  fidentity8x4_new_sse2,  // V_FLIPADST
-  fadst8x4_new_sse2       // H_FLIPADST
-};
-
-static const transform_1d_sse2 col_txfm8x4_arr[TX_TYPES] = {
-  fdct8x4_new_sse2,       // DCT_DCT
-  fadst8x4_new_sse2,      // ADST_DCT
-  fdct8x4_new_sse2,       // DCT_ADST
-  fadst8x4_new_sse2,      // ADST_ADST
-  fadst8x4_new_sse2,      // FLIPADST_DCT
-  fdct8x4_new_sse2,       // DCT_FLIPADST
-  fadst8x4_new_sse2,      // FLIPADST_FLIPADST
-  fadst8x4_new_sse2,      // ADST_FLIPADST
-  fadst8x4_new_sse2,      // FLIPADST_ADST
-  fidentity8x4_new_sse2,  // IDTX
-  fdct8x4_new_sse2,       // V_DCT
-  fidentity8x4_new_sse2,  // H_DCT
-  fadst8x4_new_sse2,      // V_ADST
-  fidentity8x4_new_sse2,  // H_ADST
-  fadst8x4_new_sse2,      // V_FLIPADST
-  fidentity8x4_new_sse2   // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm4x8_arr[TX_TYPES] = {
-  fdct4x8_new_sse2,       // DCT_DCT
-  fdct4x8_new_sse2,       // ADST_DCT
-  fadst4x8_new_sse2,      // DCT_ADST
-  fadst4x8_new_sse2,      // ADST_ADST
-  fdct4x8_new_sse2,       // FLIPADST_DCT
-  fadst4x8_new_sse2,      // DCT_FLIPADST
-  fadst4x8_new_sse2,      // FLIPADST_FLIPADST
-  fadst4x8_new_sse2,      // ADST_FLIPADST
-  fadst4x8_new_sse2,      // FLIPADST_ADST
-  fidentity8x8_new_sse2,  // IDTX
-  fidentity8x8_new_sse2,  // V_DCT
-  fdct4x8_new_sse2,       // H_DCT
-  fidentity8x8_new_sse2,  // V_ADST
-  fadst4x8_new_sse2,      // H_ADST
-  fidentity8x8_new_sse2,  // V_FLIPADST
-  fadst4x8_new_sse2       // H_FLIPADST
-};
-
-static const transform_1d_sse2 col_txfm8x8_arr[TX_TYPES] = {
-  fdct8x8_new_sse2,       // DCT_DCT
-  fadst8x8_new_sse2,      // ADST_DCT
-  fdct8x8_new_sse2,       // DCT_ADST
-  fadst8x8_new_sse2,      // ADST_ADST
-  fadst8x8_new_sse2,      // FLIPADST_DCT
-  fdct8x8_new_sse2,       // DCT_FLIPADST
-  fadst8x8_new_sse2,      // FLIPADST_FLIPADST
-  fadst8x8_new_sse2,      // ADST_FLIPADST
-  fadst8x8_new_sse2,      // FLIPADST_ADST
-  fidentity8x8_new_sse2,  // IDTX
-  fdct8x8_new_sse2,       // V_DCT
-  fidentity8x8_new_sse2,  // H_DCT
-  fadst8x8_new_sse2,      // V_ADST
-  fidentity8x8_new_sse2,  // H_ADST
-  fadst8x8_new_sse2,      // V_FLIPADST
-  fidentity8x8_new_sse2,  // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm8x8_arr[TX_TYPES] = {
-  fdct8x8_new_sse2,       // DCT_DCT
-  fdct8x8_new_sse2,       // ADST_DCT
-  fadst8x8_new_sse2,      // DCT_ADST
-  fadst8x8_new_sse2,      // ADST_ADST
-  fdct8x8_new_sse2,       // FLIPADST_DCT
-  fadst8x8_new_sse2,      // DCT_FLIPADST
-  fadst8x8_new_sse2,      // FLIPADST_FLIPADST
-  fadst8x8_new_sse2,      // ADST_FLIPADST
-  fadst8x8_new_sse2,      // FLIPADST_ADST
-  fidentity8x8_new_sse2,  // IDTX
-  fidentity8x8_new_sse2,  // V_DCT
-  fdct8x8_new_sse2,       // H_DCT
-  fidentity8x8_new_sse2,  // V_ADST
-  fadst8x8_new_sse2,      // H_ADST
-  fidentity8x8_new_sse2,  // V_FLIPADST
-  fadst8x8_new_sse2       // H_FLIPADST
-};
-
-static const transform_1d_sse2 col_txfm8x16_arr[TX_TYPES] = {
-  fdct8x16_new_sse2,       // DCT_DCT
-  fadst8x16_new_sse2,      // ADST_DCT
-  fdct8x16_new_sse2,       // DCT_ADST
-  fadst8x16_new_sse2,      // ADST_ADST
-  fadst8x16_new_sse2,      // FLIPADST_DCT
-  fdct8x16_new_sse2,       // DCT_FLIPADST
-  fadst8x16_new_sse2,      // FLIPADST_FLIPADST
-  fadst8x16_new_sse2,      // ADST_FLIPADST
-  fadst8x16_new_sse2,      // FLIPADST_ADST
-  fidentity8x16_new_sse2,  // IDTX
-  fdct8x16_new_sse2,       // V_DCT
-  fidentity8x16_new_sse2,  // H_DCT
-  fadst8x16_new_sse2,      // V_ADST
-  fidentity8x16_new_sse2,  // H_ADST
-  fadst8x16_new_sse2,      // V_FLIPADST
-  fidentity8x16_new_sse2   // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm8x16_arr[TX_TYPES] = {
-  fdct8x16_new_sse2,       // DCT_DCT
-  fdct8x16_new_sse2,       // ADST_DCT
-  fadst8x16_new_sse2,      // DCT_ADST
-  fadst8x16_new_sse2,      // ADST_ADST
-  fdct8x16_new_sse2,       // FLIPADST_DCT
-  fadst8x16_new_sse2,      // DCT_FLIPADST
-  fadst8x16_new_sse2,      // FLIPADST_FLIPADST
-  fadst8x16_new_sse2,      // ADST_FLIPADST
-  fadst8x16_new_sse2,      // FLIPADST_ADST
-  fidentity8x16_new_sse2,  // IDTX
-  fidentity8x16_new_sse2,  // V_DCT
-  fdct8x16_new_sse2,       // H_DCT
-  fidentity8x16_new_sse2,  // V_ADST
-  fadst8x16_new_sse2,      // H_ADST
-  fidentity8x16_new_sse2,  // V_FLIPADST
-  fadst8x16_new_sse2       // H_FLIPADST
-};
-
-static const transform_1d_sse2 row_txfm8x32_arr[TX_TYPES] = {
-  fdct8x32_new_sse2,       // DCT_DCT
-  NULL,                    // ADST_DCT
-  NULL,                    // DCT_ADST
-  NULL,                    // ADST_ADST
-  NULL,                    // FLIPADST_DCT
-  NULL,                    // DCT_FLIPADST
-  NULL,                    // FLIPADST_FLIPADST
-  NULL,                    // ADST_FLIPADST
-  NULL,                    // FLIPADST_ADST
-  fidentity8x32_new_sse2,  // IDTX
-  fidentity8x32_new_sse2,  // V_DCT
-  fdct8x32_new_sse2,       // H_DCT
-  NULL,                    // V_ADST
-  NULL,                    // H_ADST
-  NULL,                    // V_FLIPADST
-  NULL                     // H_FLIPADST
-};
-
-void av1_lowbd_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output,
-                                   int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[4], buf1[4], *buf;
-  const int8_t *shift = fwd_txfm_shift_ls[TX_4X4];
-  const int txw_idx = get_txw_idx(TX_4X4);
-  const int txh_idx = get_txh_idx(TX_4X4);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 4;
-  const int height = 4;
-  const transform_1d_sse2 col_txfm = col_txfm4x4_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm4x4_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  if (ud_flip) {
-    load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height);
-  } else {
-    load_buffer_16bit_to_16bit_w4(input, stride, buf0, height);
-  }
-  round_shift_16bit(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit(buf0, height, shift[1]);
-  transpose_16bit_4x4(buf0, buf1);
-
-  if (lr_flip) {
-    buf = buf0;
-    flip_buf_sse2(buf1, buf, width);
-  } else {
-    buf = buf1;
-  }
-  row_txfm(buf, buf, cos_bit_row);
-  round_shift_16bit(buf, width, shift[2]);
-  transpose_16bit_4x4(buf, buf);
-  store_buffer_16bit_to_32bit_w4(buf, output, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_4x8_sse2(const int16_t *input, int32_t *output,
-                                   int stride, TX_TYPE tx_type, int bd) {
-  (void)stride;
-  (void)bd;
-  __m128i buf0[8], buf1[8], *buf;
-  const int8_t *shift = fwd_txfm_shift_ls[TX_4X8];
-  const int txw_idx = get_txw_idx(TX_4X8);
-  const int txh_idx = get_txh_idx(TX_4X8);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 4;
-  const int height = 8;
-  const transform_1d_sse2 col_txfm = col_txfm4x8_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x4_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  if (ud_flip) {
-    load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height);
-  } else {
-    load_buffer_16bit_to_16bit_w4(input, stride, buf0, height);
-  }
-  round_shift_16bit(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit(buf0, height, shift[1]);
-  transpose_16bit_4x8(buf0, buf1);
-
-  if (lr_flip) {
-    buf = buf0;
-    flip_buf_sse2(buf1, buf, width);
-  } else {
-    buf = buf1;
-  }
-  row_txfm(buf, buf, cos_bit_row);
-  round_shift_16bit(buf, width, shift[2]);
-  transpose_16bit_8x4(buf, buf);
-  store_rect_buffer_16bit_to_32bit_w4(buf, output, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_4x16_sse2(const int16_t *input, int32_t *output,
-                                    int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[16], buf1[16];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_4X16];
-  const int txw_idx = get_txw_idx(TX_4X16);
-  const int txh_idx = get_txh_idx(TX_4X16);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 4;
-  const int height = 16;
-  const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x4_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  if (ud_flip) {
-    load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height);
-  } else {
-    load_buffer_16bit_to_16bit_w4(input, stride, buf0, height);
-  }
-  round_shift_16bit(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit(buf0, height, shift[1]);
-  transpose_16bit_4x8(buf0, buf1);
-  transpose_16bit_4x8(buf0 + 8, buf1 + 8);
-
-  for (int i = 0; i < 2; i++) {
-    __m128i *buf;
-    if (lr_flip) {
-      buf = buf0;
-      flip_buf_sse2(buf1 + 8 * i, buf, width);
-    } else {
-      buf = buf1 + 8 * i;
-    }
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit(buf, width, shift[2]);
-    transpose_16bit_8x4(buf, buf);
-    store_buffer_16bit_to_32bit_w4(buf, output + 8 * width * i, width, 8);
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_8x4_sse2(const int16_t *input, int32_t *output,
-                                   int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[8], buf1[8], *buf;
-  const int8_t *shift = fwd_txfm_shift_ls[TX_8X4];
-  const int txw_idx = get_txw_idx(TX_8X4);
-  const int txh_idx = get_txh_idx(TX_8X4);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 8;
-  const int height = 4;
-  const transform_1d_sse2 col_txfm = col_txfm8x4_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm4x8_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  if (ud_flip)
-    load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
-  else
-    load_buffer_16bit_to_16bit(input, stride, buf0, height);
-  round_shift_16bit(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit(buf0, height, shift[1]);
-  transpose_16bit_8x8(buf0, buf1);
-
-  if (lr_flip) {
-    buf = buf0;
-    flip_buf_sse2(buf1, buf, width);
-  } else {
-    buf = buf1;
-  }
-  row_txfm(buf, buf, cos_bit_row);
-  round_shift_16bit(buf, width, shift[2]);
-  transpose_16bit_8x8(buf, buf);
-  store_rect_buffer_16bit_to_32bit_w8(buf, output, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output,
-                                   int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[8], buf1[8], *buf;
-  const int8_t *shift = fwd_txfm_shift_ls[TX_8X8];
-  const int txw_idx = get_txw_idx(TX_8X8);
-  const int txh_idx = get_txh_idx(TX_8X8);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 8;
-  const int height = 8;
-  const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  if (ud_flip)
-    load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
-  else
-    load_buffer_16bit_to_16bit(input, stride, buf0, height);
-  round_shift_16bit(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit(buf0, height, shift[1]);
-  transpose_16bit_8x8(buf0, buf1);
-
-  if (lr_flip) {
-    buf = buf0;
-    flip_buf_sse2(buf1, buf, width);
-  } else {
-    buf = buf1;
-  }
-  row_txfm(buf, buf, cos_bit_row);
-  round_shift_16bit(buf, width, shift[2]);
-  transpose_16bit_8x8(buf, buf);
-  store_buffer_16bit_to_32bit_w8(buf, output, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_8x16_sse2(const int16_t *input, int32_t *output,
-                                    int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[16], buf1[16];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_8X16];
-  const int txw_idx = get_txw_idx(TX_8X16);
-  const int txh_idx = get_txh_idx(TX_8X16);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 8;
-  const int height = 16;
-  const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  if (ud_flip) {
-    load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
-  } else {
-    load_buffer_16bit_to_16bit(input, stride, buf0, height);
-  }
-  round_shift_16bit(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit(buf0, height, shift[1]);
-  transpose_16bit_8x8(buf0, buf1);
-  transpose_16bit_8x8(buf0 + 8, buf1 + 8);
-
-  for (int i = 0; i < 2; i++) {
-    __m128i *buf;
-    if (lr_flip) {
-      buf = buf0;
-      flip_buf_sse2(buf1 + width * i, buf, width);
-    } else {
-      buf = buf1 + width * i;
-    }
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit(buf, width, shift[2]);
-    transpose_16bit_8x8(buf, buf);
-    store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8);
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_8x32_sse2(const int16_t *input, int32_t *output,
-                                    int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[32], buf1[32];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_8X32];
-  const int txw_idx = get_txw_idx(TX_8X32);
-  const int txh_idx = get_txh_idx(TX_8X32);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 8;
-  const int height = 32;
-  const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  if (ud_flip) {
-    load_buffer_16bit_to_16bit_flip(input, stride, buf0, height);
-  } else {
-    load_buffer_16bit_to_16bit(input, stride, buf0, height);
-  }
-  round_shift_16bit(buf0, height, shift[0]);
-  col_txfm(buf0, buf0, cos_bit_col);
-  round_shift_16bit(buf0, height, shift[1]);
-  transpose_16bit_8x8(buf0, buf1);
-  transpose_16bit_8x8(buf0 + 8, buf1 + 8);
-  transpose_16bit_8x8(buf0 + 16, buf1 + 16);
-  transpose_16bit_8x8(buf0 + 24, buf1 + 24);
-
-  for (int i = 0; i < 4; i++) {
-    __m128i *buf;
-    if (lr_flip) {
-      buf = buf0;
-      flip_buf_sse2(buf1 + width * i, buf, width);
-    } else {
-      buf = buf1 + width * i;
-    }
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit(buf, width, shift[2]);
-    transpose_16bit_8x8(buf, buf);
-    store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8);
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_16x4_sse2(const int16_t *input, int32_t *output,
-                                    int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[16], buf1[16];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_16X4];
-  const int txw_idx = get_txw_idx(TX_16X4);
-  const int txh_idx = get_txh_idx(TX_16X4);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 16;
-  const int height = 4;
-  const transform_1d_sse2 col_txfm = col_txfm8x4_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type];
-  __m128i *buf;
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  for (int i = 0; i < 2; i++) {
-    if (ud_flip) {
-      load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
-    } else {
-      load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-    }
-    round_shift_16bit(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit(buf0, height, shift[1]);
-    transpose_16bit_8x4(buf0, buf1 + 8 * i);
-  }
-
-  if (lr_flip) {
-    buf = buf0;
-    flip_buf_sse2(buf1, buf, width);
-  } else {
-    buf = buf1;
-  }
-  row_txfm(buf, buf, cos_bit_row);
-  round_shift_16bit(buf, width, shift[2]);
-  transpose_16bit_4x8(buf, buf);
-  store_buffer_16bit_to_32bit_w8(buf, output, width, height);
-  transpose_16bit_4x8(buf + 8, buf + 8);
-  store_buffer_16bit_to_32bit_w8(buf + 8, output + 8, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_16x8_sse2(const int16_t *input, int32_t *output,
-                                    int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[16], buf1[16];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_16X8];
-  const int txw_idx = get_txw_idx(TX_16X8);
-  const int txh_idx = get_txh_idx(TX_16X8);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 16;
-  const int height = 8;
-  const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type];
-  __m128i *buf;
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  for (int i = 0; i < 2; i++) {
-    if (ud_flip) {
-      load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
-    } else {
-      load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-    }
-    round_shift_16bit(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit(buf0, height, shift[1]);
-    transpose_16bit_8x8(buf0, buf1 + 8 * i);
-  }
-
-  if (lr_flip) {
-    buf = buf0;
-    flip_buf_sse2(buf1, buf, width);
-  } else {
-    buf = buf1;
-  }
-  row_txfm(buf, buf, cos_bit_row);
-  round_shift_16bit(buf, width, shift[2]);
-  transpose_16bit_8x8(buf, buf);
-  store_rect_buffer_16bit_to_32bit_w8(buf, output, width, height);
-  transpose_16bit_8x8(buf + 8, buf + 8);
-  store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8, width, height);
-}
-
-void av1_lowbd_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output,
-                                     int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[16], buf1[32];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_16X16];
-  const int txw_idx = get_txw_idx(TX_16X16);
-  const int txh_idx = get_txh_idx(TX_16X16);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 16;
-  const int height = 16;
-  const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type];
-  int ud_flip, lr_flip;
-
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-  for (int i = 0; i < 2; i++) {
-    if (ud_flip) {
-      load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
-    } else {
-      load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-    }
-    round_shift_16bit(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit(buf0, height, shift[1]);
-    transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i);
-    transpose_16bit_8x8(buf0 + 8, buf1 + 1 * width + 8 * i);
-  }
-
-  for (int i = 0; i < 2; i++) {
-    __m128i *buf;
-    if (lr_flip) {
-      buf = buf0;
-      flip_buf_sse2(buf1 + width * i, buf, width);
-    } else {
-      buf = buf1 + width * i;
-    }
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit(buf, width, shift[2]);
-    transpose_16bit_8x8(buf, buf);
-    store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8);
-    transpose_16bit_8x8(buf + 8, buf + 8);
-    store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width,
-                                   8);
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_16x32_sse2(const int16_t *input, int32_t *output,
-                                     int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[32], buf1[64];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_16X32];
-  const int txw_idx = get_txw_idx(TX_16X32);
-  const int txh_idx = get_txh_idx(TX_16X32);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 16;
-  const int height = 32;
-  const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type];
-
-  if (col_txfm != NULL && row_txfm != NULL) {
-    int ud_flip, lr_flip;
-    get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-    for (int i = 0; i < 2; i++) {
-      if (ud_flip) {
-        load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
-      } else {
-        load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-      }
-      round_shift_16bit(buf0, height, shift[0]);
-      col_txfm(buf0, buf0, cos_bit_col);
-      round_shift_16bit(buf0, height, shift[1]);
-      transpose_16bit_8x8(buf0 + 0 * 8, buf1 + 0 * width + 8 * i);
-      transpose_16bit_8x8(buf0 + 1 * 8, buf1 + 1 * width + 8 * i);
-      transpose_16bit_8x8(buf0 + 2 * 8, buf1 + 2 * width + 8 * i);
-      transpose_16bit_8x8(buf0 + 3 * 8, buf1 + 3 * width + 8 * i);
-    }
-
-    for (int i = 0; i < 4; i++) {
-      __m128i *buf;
-      if (lr_flip) {
-        buf = buf0;
-        flip_buf_sse2(buf1 + width * i, buf, width);
-      } else {
-        buf = buf1 + width * i;
-      }
-      row_txfm(buf, buf, cos_bit_row);
-      round_shift_16bit(buf, width, shift[2]);
-      transpose_16bit_8x8(buf, buf);
-      store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width,
-                                          8);
-      transpose_16bit_8x8(buf + 8, buf + 8);
-      store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8,
-                                          width, 8);
-    }
-  } else {
-    av1_fwd_txfm2d_16x32_c(input, output, stride, tx_type, bd);
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_32x8_sse2(const int16_t *input, int32_t *output,
-                                    int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[32], buf1[32];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_32X8];
-  const int txw_idx = get_txw_idx(TX_32X8);
-  const int txh_idx = get_txh_idx(TX_32X8);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 32;
-  const int height = 8;
-  const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type];
-
-  if (col_txfm != NULL && row_txfm != NULL) {
-    int ud_flip, lr_flip;
-    get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-    for (int i = 0; i < 4; i++) {
-      if (ud_flip) {
-        load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
-      } else {
-        load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-      }
-      round_shift_16bit(buf0, height, shift[0]);
-      col_txfm(buf0, buf0, cos_bit_col);
-      round_shift_16bit(buf0, height, shift[1]);
-      transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i);
-    }
-
-    for (int i = 0; i < 1; i++) {
-      __m128i *buf;
-      if (lr_flip) {
-        buf = buf0;
-        flip_buf_sse2(buf1 + width * i, buf, width);
-      } else {
-        buf = buf1 + width * i;
-      }
-      row_txfm(buf, buf, cos_bit_row);
-      round_shift_16bit(buf, width, shift[2]);
-      transpose_16bit_8x8(buf, buf);
-      store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width,
-                                     height);
-      transpose_16bit_8x8(buf + 8, buf + 8);
-      store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width,
-                                     height);
-      transpose_16bit_8x8(buf + 16, buf + 16);
-      store_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16,
-                                     width, height);
-      transpose_16bit_8x8(buf + 24, buf + 24);
-      store_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24,
-                                     width, height);
-    }
-  } else {
-    av1_fwd_txfm2d_32x16_c(input, output, stride, tx_type, bd);
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_32x16_sse2(const int16_t *input, int32_t *output,
-                                     int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[32], buf1[64];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_32X16];
-  const int txw_idx = get_txw_idx(TX_32X16);
-  const int txh_idx = get_txh_idx(TX_32X16);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 32;
-  const int height = 16;
-  const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type];
-
-  if (col_txfm != NULL && row_txfm != NULL) {
-    int ud_flip, lr_flip;
-    get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-    for (int i = 0; i < 4; i++) {
-      if (ud_flip) {
-        load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
-      } else {
-        load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-      }
-      round_shift_16bit(buf0, height, shift[0]);
-      col_txfm(buf0, buf0, cos_bit_col);
-      round_shift_16bit(buf0, height, shift[1]);
-      transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i);
-      transpose_16bit_8x8(buf0 + 8, buf1 + 1 * width + 8 * i);
-    }
-
-    for (int i = 0; i < 2; i++) {
-      __m128i *buf;
-      if (lr_flip) {
-        buf = buf0;
-        flip_buf_sse2(buf1 + width * i, buf, width);
-      } else {
-        buf = buf1 + width * i;
-      }
-      row_txfm(buf, buf, cos_bit_row);
-      round_shift_16bit(buf, width, shift[2]);
-      transpose_16bit_8x8(buf, buf);
-      store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width,
-                                          8);
-      transpose_16bit_8x8(buf + 8, buf + 8);
-      store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8,
-                                          width, 8);
-      transpose_16bit_8x8(buf + 16, buf + 16);
-      store_rect_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16,
-                                          width, 8);
-      transpose_16bit_8x8(buf + 24, buf + 24);
-      store_rect_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24,
-                                          width, 8);
-    }
-  } else {
-    av1_fwd_txfm2d_32x16_c(input, output, stride, tx_type, bd);
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output,
-                                     int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  __m128i buf0[32], buf1[128];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_32X32];
-  const int txw_idx = get_txw_idx(TX_32X32);
-  const int txh_idx = get_txh_idx(TX_32X32);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = 32;
-  const int height = 32;
-  const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type];
-  const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type];
-
-  if (col_txfm != NULL && row_txfm != NULL) {
-    int ud_flip, lr_flip;
-    get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-    for (int i = 0; i < 4; i++) {
-      if (ud_flip) {
-        load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height);
-      } else {
-        load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-      }
-      round_shift_16bit(buf0, height, shift[0]);
-      col_txfm(buf0, buf0, cos_bit_col);
-      round_shift_16bit(buf0, height, shift[1]);
-      transpose_16bit_8x8(buf0 + 0 * 8, buf1 + 0 * width + 8 * i);
-      transpose_16bit_8x8(buf0 + 1 * 8, buf1 + 1 * width + 8 * i);
-      transpose_16bit_8x8(buf0 + 2 * 8, buf1 + 2 * width + 8 * i);
-      transpose_16bit_8x8(buf0 + 3 * 8, buf1 + 3 * width + 8 * i);
-    }
-
-    for (int i = 0; i < 4; i++) {
-      __m128i *buf;
-      if (lr_flip) {
-        buf = buf0;
-        flip_buf_sse2(buf1 + width * i, buf, width);
-      } else {
-        buf = buf1 + width * i;
-      }
-      row_txfm(buf, buf, cos_bit_row);
-      round_shift_16bit(buf, width, shift[2]);
-      transpose_16bit_8x8(buf, buf);
-      store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8);
-      transpose_16bit_8x8(buf + 8, buf + 8);
-      store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width,
-                                     8);
-      transpose_16bit_8x8(buf + 16, buf + 16);
-      store_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16,
-                                     width, 8);
-      transpose_16bit_8x8(buf + 24, buf + 24);
-      store_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24,
-                                     width, 8);
-    }
-  } else {
-    av1_fwd_txfm2d_32x32_c(input, output, stride, tx_type, bd);
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_64x16_sse2(const int16_t *input, int32_t *output,
-                                     int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  (void)tx_type;
-  assert(tx_type == DCT_DCT);
-  const TX_SIZE tx_size = TX_64X16;
-  __m128i buf0[64], buf1[128];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_sse2 col_txfm = fdct8x16_new_sse2;
-  const transform_1d_sse2 row_txfm = fdct8x64_new_sse2;
-  const int width_div8 = (width >> 3);
-  const int height_div8 = (height >> 3);
-
-  for (int i = 0; i < width_div8; i++) {
-    load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-    round_shift_16bit(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit(buf0, height, shift[1]);
-    for (int j = 0; j < height_div8; ++j) {
-      transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
-    }
-  }
-
-  for (int i = 0; i < height_div8; i++) {
-    __m128i *buf = buf1 + width * i;
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit(buf, width, shift[2]);
-    int32_t *output8 = output + 8 * 32 * i;
-    for (int j = 0; j < 4; ++j) {
-      __m128i *buf8 = buf + 8 * j;
-      transpose_16bit_8x8(buf8, buf8);
-      store_buffer_16bit_to_32bit_w8(buf8, output8 + 8 * j, 32, 8);
-    }
-  }
-}
-
-void av1_lowbd_fwd_txfm2d_16x64_sse2(const int16_t *input, int32_t *output,
-                                     int stride, TX_TYPE tx_type, int bd) {
-  (void)bd;
-  (void)tx_type;
-  assert(tx_type == DCT_DCT);
-  const TX_SIZE tx_size = TX_16X64;
-  __m128i buf0[64], buf1[128];
-  const int8_t *shift = fwd_txfm_shift_ls[tx_size];
-  const int txw_idx = get_txw_idx(tx_size);
-  const int txh_idx = get_txh_idx(tx_size);
-  const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx];
-  const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx];
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  const transform_1d_sse2 col_txfm = fdct8x64_new_sse2;
-  const transform_1d_sse2 row_txfm = fdct8x16_new_sse2;
-  const int width_div8 = (width >> 3);
-  const int height_div8 = (height >> 3);
-
-  for (int i = 0; i < width_div8; i++) {
-    load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height);
-    round_shift_16bit(buf0, height, shift[0]);
-    col_txfm(buf0, buf0, cos_bit_col);
-    round_shift_16bit(buf0, height, shift[1]);
-    for (int j = 0; j < height_div8; ++j) {
-      transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i);
-    }
-  }
-
-  for (int i = 0; i < AOMMIN(4, height_div8); i++) {
-    __m128i *buf = buf1 + width * i;
-    row_txfm(buf, buf, cos_bit_row);
-    round_shift_16bit(buf, width, shift[2]);
-    int32_t *output8 = output + 8 * width * i;
-    for (int j = 0; j < width_div8; ++j) {
-      __m128i *buf8 = buf + 8 * j;
-      transpose_16bit_8x8(buf8, buf8);
-      store_buffer_16bit_to_32bit_w8(buf8, output8 + 8 * j, width, 8);
-    }
-  }
-  // Zero out the bottom 16x32 area.
-  memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
-}
-
-static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = {
-  av1_lowbd_fwd_txfm2d_4x4_sse2,    // 4x4 transform
-  av1_lowbd_fwd_txfm2d_8x8_sse2,    // 8x8 transform
-  av1_lowbd_fwd_txfm2d_16x16_sse2,  // 16x16 transform
-  av1_lowbd_fwd_txfm2d_32x32_sse2,  // 32x32 transform
-  NULL,                             // 64x64 transform
-  av1_lowbd_fwd_txfm2d_4x8_sse2,    // 4x8 transform
-  av1_lowbd_fwd_txfm2d_8x4_sse2,    // 8x4 transform
-  av1_lowbd_fwd_txfm2d_8x16_sse2,   // 8x16 transform
-  av1_lowbd_fwd_txfm2d_16x8_sse2,   // 16x8 transform
-  av1_lowbd_fwd_txfm2d_16x32_sse2,  // 16x32 transform
-  av1_lowbd_fwd_txfm2d_32x16_sse2,  // 32x16 transform
-  NULL,                             // 32x64 transform
-  NULL,                             // 64x32 transform
-  av1_lowbd_fwd_txfm2d_4x16_sse2,   // 4x16 transform
-  av1_lowbd_fwd_txfm2d_16x4_sse2,   // 16x4 transform
-  av1_lowbd_fwd_txfm2d_8x32_sse2,   // 8x32 transform
-  av1_lowbd_fwd_txfm2d_32x8_sse2,   // 32x8 transform
-  av1_lowbd_fwd_txfm2d_16x64_sse2,  // 16x64 transform
-  av1_lowbd_fwd_txfm2d_64x16_sse2,  // 64x16 transform
-};
-
-void av1_lowbd_fwd_txfm_sse2(const int16_t *src_diff, tran_low_t *coeff,
-                             int diff_stride, TxfmParam *txfm_param) {
-  FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size];
-
-  if ((fwd_txfm2d_func == NULL) ||
-      (txfm_param->lossless && txfm_param->tx_size == TX_4X4))
-    av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
-  else
-    fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type,
-                    txfm_param->bd);
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h
deleted file mode 100644
index 99a6b9082..000000000
--- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_
-#define AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_
-
-#include <immintrin.h>
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/transpose_sse2.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void fdct8x32_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit);
-void fdct8x64_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit);
-
-static INLINE void fidentity4x4_new_sse2(const __m128i *const input,
-                                         __m128i *const output,
-                                         const int8_t cos_bit) {
-  (void)cos_bit;
-  const __m128i one = _mm_set1_epi16(1);
-
-  for (int i = 0; i < 4; ++i) {
-    const __m128i a = _mm_unpacklo_epi16(input[i], one);
-    const __m128i b = scale_round_sse2(a, NewSqrt2);
-    output[i] = _mm_packs_epi32(b, b);
-  }
-}
-
-static INLINE void fidentity8x4_new_sse2(const __m128i *const input,
-                                         __m128i *const output,
-                                         const int8_t cos_bit) {
-  (void)cos_bit;
-  const __m128i one = _mm_set1_epi16(1);
-
-  for (int i = 0; i < 4; ++i) {
-    const __m128i a_lo = _mm_unpacklo_epi16(input[i], one);
-    const __m128i a_hi = _mm_unpackhi_epi16(input[i], one);
-    const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2);
-    const __m128i b_hi = scale_round_sse2(a_hi, NewSqrt2);
-    output[i] = _mm_packs_epi32(b_lo, b_hi);
-  }
-}
-
-static INLINE void fidentity8x8_new_sse2(const __m128i *input, __m128i *output,
-                                         int8_t cos_bit) {
-  (void)cos_bit;
-
-  output[0] = _mm_adds_epi16(input[0], input[0]);
-  output[1] = _mm_adds_epi16(input[1], input[1]);
-  output[2] = _mm_adds_epi16(input[2], input[2]);
-  output[3] = _mm_adds_epi16(input[3], input[3]);
-  output[4] = _mm_adds_epi16(input[4], input[4]);
-  output[5] = _mm_adds_epi16(input[5], input[5]);
-  output[6] = _mm_adds_epi16(input[6], input[6]);
-  output[7] = _mm_adds_epi16(input[7], input[7]);
-}
-
-static INLINE void fidentity8x16_new_sse2(const __m128i *input, __m128i *output,
-                                          int8_t cos_bit) {
-  (void)cos_bit;
-  const __m128i one = _mm_set1_epi16(1);
-
-  for (int i = 0; i < 16; ++i) {
-    const __m128i a_lo = _mm_unpacklo_epi16(input[i], one);
-    const __m128i a_hi = _mm_unpackhi_epi16(input[i], one);
-    const __m128i b_lo = scale_round_sse2(a_lo, 2 * NewSqrt2);
-    const __m128i b_hi = scale_round_sse2(a_hi, 2 * NewSqrt2);
-    output[i] = _mm_packs_epi32(b_lo, b_hi);
-  }
-}
-
-static INLINE void fidentity8x32_new_sse2(const __m128i *input, __m128i *output,
-                                          int8_t cos_bit) {
-  (void)cos_bit;
-  for (int i = 0; i < 32; ++i) {
-    output[i] = _mm_slli_epi16(input[i], 2);
-  }
-}
-
-static const transform_1d_sse2 col_txfm8x32_arr[TX_TYPES] = {
-  fdct8x32_new_sse2,       // DCT_DCT
-  NULL,                    // ADST_DCT
-  NULL,                    // DCT_ADST
-  NULL,                    // ADST_ADST
-  NULL,                    // FLIPADST_DCT
-  NULL,                    // DCT_FLIPADST
-  NULL,                    // FLIPADST_FLIPADST
-  NULL,                    // ADST_FLIPADST
-  NULL,                    // FLIPADST_ADST
-  fidentity8x32_new_sse2,  // IDTX
-  fdct8x32_new_sse2,       // V_DCT
-  fidentity8x32_new_sse2,  // H_DCT
-  NULL,                    // V_ADST
-  NULL,                    // H_ADST
-  NULL,                    // V_FLIPADST
-  NULL                     // H_FLIPADST
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_
diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c
deleted file mode 100644
index b58911fcb..000000000
--- a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i dc = _mm_unpacklo_epi16(*p, zero);
-  const __m128i ac = _mm_unpackhi_epi16(*p, zero);
-  *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(dc), ac, 1);
-}
-
-static INLINE void update_qp(__m256i *qp) {
-  qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11);
-  qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11);
-  qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11);
-}
-
-static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
-                           const int16_t *dequant_ptr, int log_scale,
-                           __m256i *qp) {
-  __m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
-  if (log_scale) {
-    const __m128i round_scale = _mm_set1_epi16(1 << (15 - log_scale));
-    round = _mm_mulhrs_epi16(round, round_scale);
-  }
-  const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
-  const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr);
-
-  init_one_qp(&round, &qp[0]);
-  init_one_qp(&quant, &qp[1]);
-  init_one_qp(&dequant, &qp[2]);
-}
-
-static INLINE void quantize(const __m256i *qp, __m256i *c,
-                            const int16_t *iscan_ptr, int log_scale,
-                            tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                            __m256i *eob) {
-  const __m256i abs_coeff = _mm256_abs_epi32(*c);
-  __m256i q = _mm256_add_epi32(abs_coeff, qp[0]);
-
-  __m256i q_lo = _mm256_mul_epi32(q, qp[1]);
-  __m256i q_hi = _mm256_srli_epi64(q, 32);
-  const __m256i qp_hi = _mm256_srli_epi64(qp[1], 32);
-  q_hi = _mm256_mul_epi32(q_hi, qp_hi);
-  q_lo = _mm256_srli_epi64(q_lo, 16 - log_scale);
-  q_hi = _mm256_srli_epi64(q_hi, 16 - log_scale);
-  q_hi = _mm256_slli_epi64(q_hi, 32);
-  q = _mm256_or_si256(q_lo, q_hi);
-  const __m256i abs_s = _mm256_slli_epi32(abs_coeff, 1 + log_scale);
-  const __m256i mask = _mm256_cmpgt_epi32(qp[2], abs_s);
-  q = _mm256_andnot_si256(mask, q);
-
-  __m256i dq = _mm256_mullo_epi32(q, qp[2]);
-  dq = _mm256_srai_epi32(dq, log_scale);
-  q = _mm256_sign_epi32(q, *c);
-  dq = _mm256_sign_epi32(dq, *c);
-
-  _mm256_storeu_si256((__m256i *)qcoeff, q);
-  _mm256_storeu_si256((__m256i *)dqcoeff, dq);
-
-  const __m128i isc = _mm_loadu_si128((const __m128i *)iscan_ptr);
-  const __m128i zr = _mm_setzero_si128();
-  const __m128i lo = _mm_unpacklo_epi16(isc, zr);
-  const __m128i hi = _mm_unpackhi_epi16(isc, zr);
-  const __m256i iscan =
-      _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
-
-  const __m256i zero = _mm256_setzero_si256();
-  const __m256i zc = _mm256_cmpeq_epi32(dq, zero);
-  const __m256i nz = _mm256_cmpeq_epi32(zc, zero);
-  __m256i cur_eob = _mm256_sub_epi32(iscan, nz);
-  cur_eob = _mm256_and_si256(cur_eob, nz);
-  *eob = _mm256_max_epi32(cur_eob, *eob);
-}
-
-void av1_highbd_quantize_fp_avx2(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
-    const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan, int log_scale) {
-  (void)scan;
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  const unsigned int step = 8;
-  __m256i qp[3], coeff;
-
-  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp);
-  coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
-
-  __m256i eob = _mm256_setzero_si256();
-  quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-  coeff_ptr += step;
-  qcoeff_ptr += step;
-  dqcoeff_ptr += step;
-  iscan += step;
-  n_coeffs -= step;
-
-  update_qp(qp);
-  while (n_coeffs > 0) {
-    coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
-    quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-    coeff_ptr += step;
-    qcoeff_ptr += step;
-    dqcoeff_ptr += step;
-    iscan += step;
-    n_coeffs -= step;
-  }
-  {
-    __m256i eob_s;
-    eob_s = _mm256_shuffle_epi32(eob, 0xe);
-    eob = _mm256_max_epi16(eob, eob_s);
-    eob_s = _mm256_shufflelo_epi16(eob, 0xe);
-    eob = _mm256_max_epi16(eob, eob_s);
-    eob_s = _mm256_shufflelo_epi16(eob, 1);
-    eob = _mm256_max_epi16(eob, eob_s);
-    const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
-                                            _mm256_extractf128_si256(eob, 1));
-    *eob_ptr = _mm_extract_epi16(final_eob, 0);
-  }
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c
deleted file mode 100644
index 40b3b460b..000000000
--- a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <smmintrin.h>
-#include <stdint.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/x86/synonyms.h"
-
-// Coefficient quantization phase 1
-// param[0-2] : rounding/quan/dequan constants
-static INLINE void quantize_coeff_phase1(__m128i *coeff, const __m128i *param,
-                                         const int shift, const int scale,
-                                         __m128i *qcoeff, __m128i *dquan,
-                                         __m128i *sign) {
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i one = _mm_set1_epi32(1);
-
-  *sign = _mm_cmplt_epi32(*coeff, zero);
-  *sign = _mm_or_si128(*sign, one);
-  *coeff = _mm_abs_epi32(*coeff);
-
-  qcoeff[0] = _mm_add_epi32(*coeff, param[0]);
-  qcoeff[1] = _mm_unpackhi_epi32(qcoeff[0], zero);
-  qcoeff[0] = _mm_unpacklo_epi32(qcoeff[0], zero);
-
-  qcoeff[0] = _mm_mul_epi32(qcoeff[0], param[1]);
-  qcoeff[0] = _mm_srli_epi64(qcoeff[0], shift);
-  dquan[0] = _mm_mul_epi32(qcoeff[0], param[2]);
-  dquan[0] = _mm_srli_epi64(dquan[0], scale);
-  const __m128i abs_s = _mm_slli_epi32(*coeff, 1 + scale);
-  qcoeff[2] = _mm_cmplt_epi32(abs_s, param[3]);
-}
-
-// Coefficient quantization phase 2
-static INLINE void quantize_coeff_phase2(__m128i *qcoeff, __m128i *dquan,
-                                         const __m128i *sign,
-                                         const __m128i *param, const int shift,
-                                         const int scale, tran_low_t *qAddr,
-                                         tran_low_t *dqAddr) {
-  __m128i mask0L = _mm_set_epi32(-1, -1, 0, 0);
-  __m128i mask0H = _mm_set_epi32(0, 0, -1, -1);
-
-  qcoeff[1] = _mm_mul_epi32(qcoeff[1], param[1]);
-  qcoeff[1] = _mm_srli_epi64(qcoeff[1], shift);
-  dquan[1] = _mm_mul_epi32(qcoeff[1], param[2]);
-  dquan[1] = _mm_srli_epi64(dquan[1], scale);
-
-  // combine L&H
-  qcoeff[0] = _mm_shuffle_epi32(qcoeff[0], 0xd8);
-  qcoeff[1] = _mm_shuffle_epi32(qcoeff[1], 0x8d);
-
-  qcoeff[0] = _mm_and_si128(qcoeff[0], mask0H);
-  qcoeff[1] = _mm_and_si128(qcoeff[1], mask0L);
-
-  dquan[0] = _mm_shuffle_epi32(dquan[0], 0xd8);
-  dquan[1] = _mm_shuffle_epi32(dquan[1], 0x8d);
-
-  dquan[0] = _mm_and_si128(dquan[0], mask0H);
-  dquan[1] = _mm_and_si128(dquan[1], mask0L);
-
-  qcoeff[0] = _mm_or_si128(qcoeff[0], qcoeff[1]);
-  dquan[0] = _mm_or_si128(dquan[0], dquan[1]);
-
-  qcoeff[0] = _mm_sign_epi32(qcoeff[0], *sign);
-  dquan[0] = _mm_sign_epi32(dquan[0], *sign);
-  qcoeff[0] = _mm_andnot_si128(qcoeff[2], qcoeff[0]);
-  dquan[0] = _mm_andnot_si128(qcoeff[2], dquan[0]);
-  _mm_storeu_si128((__m128i *)qAddr, qcoeff[0]);
-  _mm_storeu_si128((__m128i *)dqAddr, dquan[0]);
-}
-
-static INLINE void find_eob(tran_low_t *qcoeff_ptr, const int16_t *iscan,
-                            __m128i *eob) {
-  const __m128i zero = _mm_setzero_si128();
-  __m128i mask, iscanIdx;
-  const __m128i q0 = _mm_loadu_si128((__m128i const *)qcoeff_ptr);
-  const __m128i q1 = _mm_loadu_si128((__m128i const *)(qcoeff_ptr + 4));
-  __m128i nz_flag0 = _mm_cmpeq_epi32(q0, zero);
-  __m128i nz_flag1 = _mm_cmpeq_epi32(q1, zero);
-
-  nz_flag0 = _mm_cmpeq_epi32(nz_flag0, zero);
-  nz_flag1 = _mm_cmpeq_epi32(nz_flag1, zero);
-
-  mask = _mm_packs_epi32(nz_flag0, nz_flag1);
-  iscanIdx = _mm_loadu_si128((__m128i const *)iscan);
-  iscanIdx = _mm_sub_epi16(iscanIdx, mask);
-  iscanIdx = _mm_and_si128(iscanIdx, mask);
-  *eob = _mm_max_epi16(*eob, iscanIdx);
-}
-
-static INLINE uint16_t get_accumulated_eob(__m128i *eob) {
-  __m128i eob_shuffled;
-  uint16_t eobValue;
-  eob_shuffled = _mm_shuffle_epi32(*eob, 0xe);
-  *eob = _mm_max_epi16(*eob, eob_shuffled);
-  eob_shuffled = _mm_shufflelo_epi16(*eob, 0xe);
-  *eob = _mm_max_epi16(*eob, eob_shuffled);
-  eob_shuffled = _mm_shufflelo_epi16(*eob, 0x1);
-  *eob = _mm_max_epi16(*eob, eob_shuffled);
-  eobValue = _mm_extract_epi16(*eob, 0);
-  return eobValue;
-}
-
-void av1_highbd_quantize_fp_sse4_1(
-    const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
-    const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan, int log_scale) {
-  __m128i coeff[2], qcoeff[3], dequant[2], qparam[4], coeff_sign;
-  __m128i eob = _mm_setzero_si128();
-  const tran_low_t *src = coeff_ptr;
-  tran_low_t *quanAddr = qcoeff_ptr;
-  tran_low_t *dquanAddr = dqcoeff_ptr;
-  const int shift = 16 - log_scale;
-  const int coeff_stride = 4;
-  const int quan_stride = coeff_stride;
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  (void)scan;
-
-  memset(quanAddr, 0, count * sizeof(quanAddr[0]));
-  memset(dquanAddr, 0, count * sizeof(dquanAddr[0]));
-
-  coeff[0] = _mm_loadu_si128((__m128i const *)src);
-  const int round1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
-  const int round0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
-
-  qparam[0] = _mm_set_epi32(round1, round1, round1, round0);
-  qparam[1] = xx_set_64_from_32i(quant_ptr[1], quant_ptr[0]);
-  qparam[2] = xx_set_64_from_32i(dequant_ptr[1], dequant_ptr[0]);
-  qparam[3] = _mm_set_epi32(dequant_ptr[1], dequant_ptr[1], dequant_ptr[1],
-                            dequant_ptr[0]);
-
-  // DC and first 3 AC
-  quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
-                        &coeff_sign);
-
-  // update round/quan/dquan for AC
-  qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]);
-  qparam[1] = xx_set1_64_from_32i(quant_ptr[1]);
-  qparam[2] = xx_set1_64_from_32i(dequant_ptr[1]);
-  qparam[3] = _mm_set1_epi32(dequant_ptr[1]);
-  quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale,
-                        quanAddr, dquanAddr);
-
-  // next 4 AC
-  coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
-  quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant,
-                        &coeff_sign);
-  quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale,
-                        quanAddr + quan_stride, dquanAddr + quan_stride);
-
-  find_eob(quanAddr, iscan, &eob);
-
-  count -= 8;
-
-  // loop for the rest of AC
-  while (count > 0) {
-    src += coeff_stride << 1;
-    quanAddr += quan_stride << 1;
-    dquanAddr += quan_stride << 1;
-    iscan += quan_stride << 1;
-
-    coeff[0] = _mm_loadu_si128((__m128i const *)src);
-    coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
-
-    quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
-                          &coeff_sign);
-    quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
-                          log_scale, quanAddr, dquanAddr);
-
-    quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant,
-                          &coeff_sign);
-    quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
-                          log_scale, quanAddr + quan_stride,
-                          dquanAddr + quan_stride);
-
-    find_eob(quanAddr, iscan, &eob);
-
-    count -= 8;
-  }
-  *eob_ptr = get_accumulated_eob(&eob);
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
deleted file mode 100644
index df22aaba7..000000000
--- a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/aom_dsp_common.h"
-
-static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) {
-  if (sizeof(tran_low_t) == 4) {
-    const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff);
-    const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1);
-    *c = _mm256_packs_epi32(x0, x1);
-    *c = _mm256_permute4x64_epi64(*c, 0xD8);
-  } else {
-    *c = _mm256_loadu_si256((const __m256i *)coeff);
-  }
-}
-
-static INLINE void write_zero(tran_low_t *qcoeff) {
-  const __m256i zero = _mm256_setzero_si256();
-  if (sizeof(tran_low_t) == 4) {
-    _mm256_storeu_si256((__m256i *)qcoeff, zero);
-    _mm256_storeu_si256((__m256i *)qcoeff + 1, zero);
-  } else {
-    _mm256_storeu_si256((__m256i *)qcoeff, zero);
-  }
-}
-
-static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
-  const __m128i ac = _mm_unpackhi_epi64(*p, *p);
-  *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(*p), ac, 1);
-}
-
-static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
-                           const int16_t *dequant_ptr, int log_scale,
-                           __m256i *thr, __m256i *qp) {
-  __m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
-  const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
-  const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr);
-
-  if (log_scale > 0) {
-    const __m128i rnd = _mm_set1_epi16((int16_t)1 << (log_scale - 1));
-    round = _mm_add_epi16(round, rnd);
-    round = _mm_srai_epi16(round, log_scale);
-  }
-
-  init_one_qp(&round, &qp[0]);
-  init_one_qp(&quant, &qp[1]);
-
-  if (log_scale == 1) {
-    qp[1] = _mm256_slli_epi16(qp[1], log_scale);
-  }
-
-  init_one_qp(&dequant, &qp[2]);
-  *thr = _mm256_srai_epi16(qp[2], 1 + log_scale);
-}
-
-static INLINE void update_qp(int log_scale, __m256i *thr, __m256i *qp) {
-  qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11);
-  qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11);
-  qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11);
-  *thr = _mm256_srai_epi16(qp[2], 1 + log_scale);
-}
-
-#define store_quan(q, addr)                               \
-  do {                                                    \
-    __m256i sign_bits = _mm256_srai_epi16(q, 15);         \
-    __m256i y0 = _mm256_unpacklo_epi16(q, sign_bits);     \
-    __m256i y1 = _mm256_unpackhi_epi16(q, sign_bits);     \
-    __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \
-    __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \
-    _mm256_storeu_si256((__m256i *)addr, x0);             \
-    _mm256_storeu_si256((__m256i *)addr + 1, x1);         \
-  } while (0)
-
-#define store_two_quan(q, addr1, dq, addr2)      \
-  do {                                           \
-    if (sizeof(tran_low_t) == 4) {               \
-      store_quan(q, addr1);                      \
-      store_quan(dq, addr2);                     \
-    } else {                                     \
-      _mm256_storeu_si256((__m256i *)addr1, q);  \
-      _mm256_storeu_si256((__m256i *)addr2, dq); \
-    }                                            \
-  } while (0)
-
-static INLINE uint16_t quant_gather_eob(__m256i eob) {
-  const __m128i eob_lo = _mm256_castsi256_si128(eob);
-  const __m128i eob_hi = _mm256_extractf128_si256(eob, 1);
-  __m128i eob_s = _mm_max_epi16(eob_lo, eob_hi);
-  eob_s = _mm_subs_epu16(_mm_set1_epi16(INT16_MAX), eob_s);
-  eob_s = _mm_minpos_epu16(eob_s);
-  return INT16_MAX - _mm_extract_epi16(eob_s, 0);
-}
-
-static INLINE void quantize(const __m256i *thr, const __m256i *qp, __m256i *c,
-                            const int16_t *iscan_ptr, tran_low_t *qcoeff,
-                            tran_low_t *dqcoeff, __m256i *eob) {
-  const __m256i abs_coeff = _mm256_abs_epi16(*c);
-  __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
-  mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
-  const int nzflag = _mm256_movemask_epi8(mask);
-
-  if (nzflag) {
-    __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
-    q = _mm256_mulhi_epi16(q, qp[1]);
-    q = _mm256_sign_epi16(q, *c);
-    const __m256i dq = _mm256_mullo_epi16(q, qp[2]);
-
-    store_two_quan(q, qcoeff, dq, dqcoeff);
-    const __m256i zero = _mm256_setzero_si256();
-    const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
-    const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
-    const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
-    __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
-    cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
-    *eob = _mm256_max_epi16(*eob, cur_eob);
-  } else {
-    write_zero(qcoeff);
-    write_zero(dqcoeff);
-  }
-}
-
-void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                          const int16_t *zbin_ptr, const int16_t *round_ptr,
-                          const int16_t *quant_ptr,
-                          const int16_t *quant_shift_ptr,
-                          tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                          const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                          const int16_t *scan_ptr, const int16_t *iscan_ptr) {
-  (void)scan_ptr;
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  const unsigned int step = 16;
-
-  __m256i qp[3];
-  __m256i coeff, thr;
-  const int log_scale = 0;
-
-  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
-  read_coeff(coeff_ptr, &coeff);
-
-  __m256i eob = _mm256_setzero_si256();
-  quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-  coeff_ptr += step;
-  qcoeff_ptr += step;
-  dqcoeff_ptr += step;
-  iscan_ptr += step;
-  n_coeffs -= step;
-
-  update_qp(log_scale, &thr, qp);
-
-  while (n_coeffs > 0) {
-    read_coeff(coeff_ptr, &coeff);
-    quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-    coeff_ptr += step;
-    qcoeff_ptr += step;
-    dqcoeff_ptr += step;
-    iscan_ptr += step;
-    n_coeffs -= step;
-  }
-  *eob_ptr = quant_gather_eob(eob);
-}
-
-static INLINE void quantize_32x32(const __m256i *thr, const __m256i *qp,
-                                  __m256i *c, const int16_t *iscan_ptr,
-                                  tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                                  __m256i *eob) {
-  const __m256i abs_coeff = _mm256_abs_epi16(*c);
-  __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
-  mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
-  const int nzflag = _mm256_movemask_epi8(mask);
-
-  if (nzflag) {
-    __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
-    q = _mm256_mulhi_epu16(q, qp[1]);
-
-    __m256i dq = _mm256_mullo_epi16(q, qp[2]);
-    dq = _mm256_srli_epi16(dq, 1);
-
-    q = _mm256_sign_epi16(q, *c);
-    dq = _mm256_sign_epi16(dq, *c);
-
-    store_two_quan(q, qcoeff, dq, dqcoeff);
-    const __m256i zero = _mm256_setzero_si256();
-    const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
-    const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
-    const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
-    __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
-    cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
-    *eob = _mm256_max_epi16(*eob, cur_eob);
-  } else {
-    write_zero(qcoeff);
-    write_zero(dqcoeff);
-  }
-}
-
-void av1_quantize_fp_32x32_avx2(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
-    const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan_ptr, const int16_t *iscan_ptr) {
-  (void)scan_ptr;
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  const unsigned int step = 16;
-
-  __m256i qp[3];
-  __m256i coeff, thr;
-  const int log_scale = 1;
-
-  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
-  read_coeff(coeff_ptr, &coeff);
-
-  __m256i eob = _mm256_setzero_si256();
-  quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-  coeff_ptr += step;
-  qcoeff_ptr += step;
-  dqcoeff_ptr += step;
-  iscan_ptr += step;
-  n_coeffs -= step;
-
-  update_qp(log_scale, &thr, qp);
-
-  while (n_coeffs > 0) {
-    read_coeff(coeff_ptr, &coeff);
-    quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-    coeff_ptr += step;
-    qcoeff_ptr += step;
-    dqcoeff_ptr += step;
-    iscan_ptr += step;
-    n_coeffs -= step;
-  }
-  *eob_ptr = quant_gather_eob(eob);
-}
-
-static INLINE void quantize_64x64(const __m256i *thr, const __m256i *qp,
-                                  __m256i *c, const int16_t *iscan_ptr,
-                                  tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                                  __m256i *eob) {
-  const __m256i abs_coeff = _mm256_abs_epi16(*c);
-  __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
-  mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
-  const int nzflag = _mm256_movemask_epi8(mask);
-
-  if (nzflag) {
-    __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
-    __m256i qh = _mm256_mulhi_epi16(q, qp[1]);
-    __m256i ql = _mm256_mullo_epi16(q, qp[1]);
-    qh = _mm256_slli_epi16(qh, 2);
-    ql = _mm256_srli_epi16(ql, 14);
-    q = _mm256_or_si256(qh, ql);
-    const __m256i dqh = _mm256_slli_epi16(_mm256_mulhi_epi16(q, qp[2]), 14);
-    const __m256i dql = _mm256_srli_epi16(_mm256_mullo_epi16(q, qp[2]), 2);
-    __m256i dq = _mm256_or_si256(dqh, dql);
-
-    q = _mm256_sign_epi16(q, *c);
-    dq = _mm256_sign_epi16(dq, *c);
-
-    store_two_quan(q, qcoeff, dq, dqcoeff);
-    const __m256i zero = _mm256_setzero_si256();
-    const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
-    const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
-    const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
-    __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
-    cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
-    *eob = _mm256_max_epi16(*eob, cur_eob);
-  } else {
-    write_zero(qcoeff);
-    write_zero(dqcoeff);
-  }
-}
-
-void av1_quantize_fp_64x64_avx2(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
-    const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan_ptr, const int16_t *iscan_ptr) {
-  (void)scan_ptr;
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  const unsigned int step = 16;
-
-  __m256i qp[3];
-  __m256i coeff, thr;
-  const int log_scale = 2;
-
-  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
-  read_coeff(coeff_ptr, &coeff);
-
-  __m256i eob = _mm256_setzero_si256();
-  quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-  coeff_ptr += step;
-  qcoeff_ptr += step;
-  dqcoeff_ptr += step;
-  iscan_ptr += step;
-  n_coeffs -= step;
-
-  update_qp(log_scale, &thr, qp);
-
-  while (n_coeffs > 0) {
-    read_coeff(coeff_ptr, &coeff);
-    quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
-
-    coeff_ptr += step;
-    qcoeff_ptr += step;
-    dqcoeff_ptr += step;
-    iscan_ptr += step;
-    n_coeffs -= step;
-  }
-  *eob_ptr = quant_gather_eob(eob);
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
deleted file mode 100644
index b07e7717f..000000000
--- a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-#include <xmmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
-                              __m128i *c0, __m128i *c1) {
-  const tran_low_t *addr = coeff + offset;
-  if (sizeof(tran_low_t) == 4) {
-    const __m128i x0 = _mm_load_si128((const __m128i *)addr);
-    const __m128i x1 = _mm_load_si128((const __m128i *)addr + 1);
-    const __m128i x2 = _mm_load_si128((const __m128i *)addr + 2);
-    const __m128i x3 = _mm_load_si128((const __m128i *)addr + 3);
-    *c0 = _mm_packs_epi32(x0, x1);
-    *c1 = _mm_packs_epi32(x2, x3);
-  } else {
-    *c0 = _mm_load_si128((const __m128i *)addr);
-    *c1 = _mm_load_si128((const __m128i *)addr + 1);
-  }
-}
-
-static INLINE void write_qcoeff(const __m128i *qc0, const __m128i *qc1,
-                                tran_low_t *qcoeff, intptr_t offset) {
-  tran_low_t *addr = qcoeff + offset;
-  if (sizeof(tran_low_t) == 4) {
-    const __m128i zero = _mm_setzero_si128();
-    __m128i sign_bits = _mm_cmplt_epi16(*qc0, zero);
-    __m128i y0 = _mm_unpacklo_epi16(*qc0, sign_bits);
-    __m128i y1 = _mm_unpackhi_epi16(*qc0, sign_bits);
-    _mm_store_si128((__m128i *)addr, y0);
-    _mm_store_si128((__m128i *)addr + 1, y1);
-
-    sign_bits = _mm_cmplt_epi16(*qc1, zero);
-    y0 = _mm_unpacklo_epi16(*qc1, sign_bits);
-    y1 = _mm_unpackhi_epi16(*qc1, sign_bits);
-    _mm_store_si128((__m128i *)addr + 2, y0);
-    _mm_store_si128((__m128i *)addr + 3, y1);
-  } else {
-    _mm_store_si128((__m128i *)addr, *qc0);
-    _mm_store_si128((__m128i *)addr + 1, *qc1);
-  }
-}
-
-static INLINE void write_zero(tran_low_t *qcoeff, intptr_t offset) {
-  const __m128i zero = _mm_setzero_si128();
-  tran_low_t *addr = qcoeff + offset;
-  if (sizeof(tran_low_t) == 4) {
-    _mm_store_si128((__m128i *)addr, zero);
-    _mm_store_si128((__m128i *)addr + 1, zero);
-    _mm_store_si128((__m128i *)addr + 2, zero);
-    _mm_store_si128((__m128i *)addr + 3, zero);
-  } else {
-    _mm_store_si128((__m128i *)addr, zero);
-    _mm_store_si128((__m128i *)addr + 1, zero);
-  }
-}
-
-static INLINE void quantize(const int16_t *iscan_ptr,
-                            const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                            const __m128i *round0, const __m128i *round1,
-                            const __m128i *quant0, const __m128i *quant1,
-                            const __m128i *dequant0, const __m128i *dequant1,
-                            const __m128i *thr0, const __m128i *thr1,
-                            __m128i *eob) {
-  __m128i coeff0, coeff1;
-  // Do DC and first 15 AC
-  read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1);
-
-  // Poor man's sign extract
-  const __m128i coeff0_sign = _mm_srai_epi16(coeff0, 15);
-  const __m128i coeff1_sign = _mm_srai_epi16(coeff1, 15);
-  __m128i qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
-  __m128i qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
-  qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-  qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-  const __m128i mask0 = _mm_or_si128(_mm_cmpgt_epi16(qcoeff0, *thr0),
-                                     _mm_cmpeq_epi16(qcoeff0, *thr0));
-  const __m128i mask1 = _mm_or_si128(_mm_cmpgt_epi16(qcoeff1, *thr1),
-                                     _mm_cmpeq_epi16(qcoeff1, *thr1));
-  const int16_t nzflag = _mm_movemask_epi8(mask0) | _mm_movemask_epi8(mask1);
-
-  if (nzflag) {
-    qcoeff0 = _mm_adds_epi16(qcoeff0, *round0);
-    qcoeff1 = _mm_adds_epi16(qcoeff1, *round1);
-    const __m128i qtmp0 = _mm_mulhi_epi16(qcoeff0, *quant0);
-    const __m128i qtmp1 = _mm_mulhi_epi16(qcoeff1, *quant1);
-
-    // Reinsert signs
-    qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
-    qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
-    qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-    qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-    write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs);
-
-    coeff0 = _mm_mullo_epi16(qcoeff0, *dequant0);
-    coeff1 = _mm_mullo_epi16(qcoeff1, *dequant1);
-
-    write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs);
-
-    const __m128i zero = _mm_setzero_si128();
-    // Scan for eob
-    const __m128i zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
-    const __m128i zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
-    const __m128i nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
-    const __m128i nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-    const __m128i iscan0 =
-        _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
-    const __m128i iscan1 =
-        _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
-    // Add one to convert from indices to counts
-    const __m128i iscan0_nz = _mm_sub_epi16(iscan0, nzero_coeff0);
-    const __m128i iscan1_nz = _mm_sub_epi16(iscan1, nzero_coeff1);
-    const __m128i eob0 = _mm_and_si128(iscan0_nz, nzero_coeff0);
-    const __m128i eob1 = _mm_and_si128(iscan1_nz, nzero_coeff1);
-    const __m128i eob2 = _mm_max_epi16(eob0, eob1);
-    *eob = _mm_max_epi16(*eob, eob2);
-  } else {
-    write_zero(qcoeff_ptr, n_coeffs);
-    write_zero(dqcoeff_ptr, n_coeffs);
-  }
-}
-
-void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                          const int16_t *zbin_ptr, const int16_t *round_ptr,
-                          const int16_t *quant_ptr,
-                          const int16_t *quant_shift_ptr,
-                          tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                          const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                          const int16_t *scan_ptr, const int16_t *iscan_ptr) {
-  (void)scan_ptr;
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-
-  coeff_ptr += n_coeffs;
-  iscan_ptr += n_coeffs;
-  qcoeff_ptr += n_coeffs;
-  dqcoeff_ptr += n_coeffs;
-  n_coeffs = -n_coeffs;
-
-  const __m128i round0 = _mm_load_si128((const __m128i *)round_ptr);
-  const __m128i round1 = _mm_unpackhi_epi64(round0, round0);
-  const __m128i quant0 = _mm_load_si128((const __m128i *)quant_ptr);
-  const __m128i quant1 = _mm_unpackhi_epi64(quant0, quant0);
-  const __m128i dequant0 = _mm_load_si128((const __m128i *)dequant_ptr);
-  const __m128i dequant1 = _mm_unpackhi_epi64(dequant0, dequant0);
-  const __m128i thr0 = _mm_srai_epi16(dequant0, 1);
-  const __m128i thr1 = _mm_srai_epi16(dequant1, 1);
-  __m128i eob = _mm_setzero_si128();
-
-  quantize(iscan_ptr, coeff_ptr, n_coeffs, qcoeff_ptr, dqcoeff_ptr, &round0,
-           &round1, &quant0, &quant1, &dequant0, &dequant1, &thr0, &thr1, &eob);
-
-  n_coeffs += 8 * 2;
-
-  // AC only loop
-  while (n_coeffs < 0) {
-    quantize(iscan_ptr, coeff_ptr, n_coeffs, qcoeff_ptr, dqcoeff_ptr, &round1,
-             &round1, &quant1, &quant1, &dequant1, &dequant1, &thr1, &thr1,
-             &eob);
-    n_coeffs += 8 * 2;
-  }
-
-  // Accumulate EOB
-  {
-    __m128i eob_shuffled;
-    eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
-    eob = _mm_max_epi16(eob, eob_shuffled);
-    eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
-    eob = _mm_max_epi16(eob, eob_shuffled);
-    eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
-    eob = _mm_max_epi16(eob, eob_shuffled);
-    *eob_ptr = _mm_extract_epi16(eob, 1);
-  }
-}
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm b/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm
deleted file mode 100644
index ad4ae274e..000000000
--- a/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm
+++ /dev/null
@@ -1,204 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%define private_prefix av1
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION_RODATA
-pw_1: times 8 dw 1
-
-SECTION .text
-
-%macro QUANTIZE_FP 2
-cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
-                                shift, qcoeff, dqcoeff, dequant, \
-                                eob, scan, iscan
-  cmp                    dword skipm, 0
-  jne .blank
-
-  ; actual quantize loop - setup pointers, rounders, etc.
-  movifnidn                   coeffq, coeffmp
-  movifnidn                  ncoeffq, ncoeffmp
-  mov                             r2, dequantmp
-  movifnidn                    zbinq, zbinmp
-  movifnidn                   roundq, roundmp
-  movifnidn                   quantq, quantmp
-  mova                            m1, [roundq]             ; m1 = round
-  mova                            m2, [quantq]             ; m2 = quant
-%ifidn %1, fp_32x32
-  pcmpeqw                         m5, m5
-  psrlw                           m5, 15
-  paddw                           m1, m5
-  psrlw                           m1, 1                    ; m1 = (m1 + 1) / 2
-%endif
-  mova                            m3, [r2q]                ; m3 = dequant
-  mov                             r3, qcoeffmp
-  mov                             r4, dqcoeffmp
-  mov                             r5, iscanmp
-%ifidn %1, fp_32x32
-  psllw                           m2, 1
-%endif
-  pxor                            m5, m5                   ; m5 = dedicated zero
-
-  lea                         coeffq, [  coeffq+ncoeffq*2]
-  lea                            r5q, [  r5q+ncoeffq*2]
-  lea                            r3q, [ r3q+ncoeffq*2]
-  lea                            r4q, [r4q+ncoeffq*2]
-  neg                        ncoeffq
-
-  ; get DC and first 15 AC coeffs
-  mova                            m9, [  coeffq+ncoeffq*2+ 0] ; m9 = c[i]
-  mova                           m10, [  coeffq+ncoeffq*2+16] ; m10 = c[i]
-  pabsw                           m6, m9                   ; m6 = abs(m9)
-  pabsw                          m11, m10                  ; m11 = abs(m10)
-  pcmpeqw                         m7, m7
-
-  paddsw                          m6, m1                   ; m6 += round
-  punpckhqdq                      m1, m1
-  paddsw                         m11, m1                   ; m11 += round
-  pmulhw                          m8, m6, m2               ; m8 = m6*q>>16
-  punpckhqdq                      m2, m2
-  pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
-  psignw                          m8, m9                   ; m8 = reinsert sign
-  psignw                         m13, m10                  ; m13 = reinsert sign
-  mova            [r3q+ncoeffq*2+ 0], m8
-  mova            [r3q+ncoeffq*2+16], m13
-%ifidn %1, fp_32x32
-  pabsw                           m8, m8
-  pabsw                          m13, m13
-%endif
-  pmullw                          m8, m3                   ; r4[i] = r3[i] * q
-  punpckhqdq                      m3, m3
-  pmullw                         m13, m3                   ; r4[i] = r3[i] * q
-%ifidn %1, fp_32x32
-  psrlw                           m8, 1
-  psrlw                          m13, 1
-  psignw                          m8, m9
-  psignw                         m13, m10
-  psrlw                           m0, m3, 2
-%else
-  psrlw                           m0, m3, 1
-%endif
-  mova            [r4q+ncoeffq*2+ 0], m8
-  mova            [r4q+ncoeffq*2+16], m13
-  pcmpeqw                         m8, m5                   ; m8 = c[i] == 0
-  pcmpeqw                        m13, m5                   ; m13 = c[i] == 0
-  mova                            m6, [  r5q+ncoeffq*2+ 0] ; m6 = scan[i]
-  mova                           m11, [  r5q+ncoeffq*2+16] ; m11 = scan[i]
-  psubw                           m6, m7                   ; m6 = scan[i] + 1
-  psubw                          m11, m7                   ; m11 = scan[i] + 1
-  pandn                           m8, m6                   ; m8 = max(eob)
-  pandn                          m13, m11                  ; m13 = max(eob)
-  pmaxsw                          m8, m13
-  add                        ncoeffq, mmsize
-  jz .accumulate_eob
-
-.ac_only_loop:
-  mova                            m9, [  coeffq+ncoeffq*2+ 0] ; m9 = c[i]
-  mova                           m10, [  coeffq+ncoeffq*2+16] ; m10 = c[i]
-  pabsw                           m6, m9                   ; m6 = abs(m9)
-  pabsw                          m11, m10                  ; m11 = abs(m10)
-
-  pcmpgtw                         m7, m6,  m0
-  pcmpgtw                        m12, m11, m0
-  pmovmskb                       r6d, m7
-  pmovmskb                       r2d, m12
-
-  or                              r6, r2
-  jz .skip_iter
-
-  pcmpeqw                         m7, m7
-
-  paddsw                          m6, m1                   ; m6 += round
-  paddsw                         m11, m1                   ; m11 += round
-  pmulhw                         m14, m6, m2               ; m14 = m6*q>>16
-  pmulhw                         m13, m11, m2              ; m13 = m11*q>>16
-  psignw                         m14, m9                   ; m14 = reinsert sign
-  psignw                         m13, m10                  ; m13 = reinsert sign
-  mova            [r3q+ncoeffq*2+ 0], m14
-  mova            [r3q+ncoeffq*2+16], m13
-%ifidn %1, fp_32x32
-  pabsw                          m14, m14
-  pabsw                          m13, m13
-%endif
-  pmullw                         m14, m3                   ; r4[i] = r3[i] * q
-  pmullw                         m13, m3                   ; r4[i] = r3[i] * q
-%ifidn %1, fp_32x32
-  psrlw                          m14, 1
-  psrlw                          m13, 1
-  psignw                         m14, m9
-  psignw                         m13, m10
-%endif
-  mova            [r4q+ncoeffq*2+ 0], m14
-  mova            [r4q+ncoeffq*2+16], m13
-  pcmpeqw                        m14, m5                   ; m14 = c[i] == 0
-  pcmpeqw                        m13, m5                   ; m13 = c[i] == 0
-  mova                            m6, [  r5q+ncoeffq*2+ 0] ; m6 = scan[i]
-  mova                           m11, [  r5q+ncoeffq*2+16] ; m11 = scan[i]
-  psubw                           m6, m7                   ; m6 = scan[i] + 1
-  psubw                          m11, m7                   ; m11 = scan[i] + 1
-  pandn                          m14, m6                   ; m14 = max(eob)
-  pandn                          m13, m11                  ; m13 = max(eob)
-  pmaxsw                          m8, m14
-  pmaxsw                          m8, m13
-  add                        ncoeffq, mmsize
-  jl .ac_only_loop
-
-  jmp .accumulate_eob
-.skip_iter:
-  mova            [r3q+ncoeffq*2+ 0], m5
-  mova            [r3q+ncoeffq*2+16], m5
-  mova            [r4q+ncoeffq*2+ 0], m5
-  mova            [r4q+ncoeffq*2+16], m5
-  add                        ncoeffq, mmsize
-  jl .ac_only_loop
-
-.accumulate_eob:
-  ; horizontally accumulate/max eobs and write into [eob] memory pointer
-  mov                             r2, eobmp
-  pshufd                          m7, m8, 0xe
-  pmaxsw                          m8, m7
-  pshuflw                         m7, m8, 0xe
-  pmaxsw                          m8, m7
-  pshuflw                         m7, m8, 0x1
-  pmaxsw                          m8, m7
-  pextrw                          r6, m8, 0
-  mov                           [r2], r6
-  RET
-
-  ; skip-block, i.e. just write all zeroes
-.blank:
-  mov                             r0, dqcoeffmp
-  movifnidn                  ncoeffq, ncoeffmp
-  mov                             r2, qcoeffmp
-  mov                             r3, eobmp
-
-  lea                            r0q, [r0q+ncoeffq*2]
-  lea                            r2q, [r2q+ncoeffq*2]
-  neg                        ncoeffq
-  pxor                            m7, m7
-.blank_loop:
-  mova            [r0q+ncoeffq*2+ 0], m7
-  mova            [r0q+ncoeffq*2+16], m7
-  mova            [r2q+ncoeffq*2+ 0], m7
-  mova            [r2q+ncoeffq*2+16], m7
-  add                        ncoeffq, mmsize
-  jl .blank_loop
-  mov                     word [r3q], 0
-  RET
-%endmacro
-
-INIT_XMM ssse3
-QUANTIZE_FP fp, 7
-QUANTIZE_FP fp_32x32, 7
diff --git a/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm b/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm
deleted file mode 100644
index faa2a232a..000000000
--- a/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm
+++ /dev/null
@@ -1,222 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "aom_ports/x86_abi_support.asm"
-
-; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
-%macro TABULATE_SSIM 0
-        paddusw         xmm15, xmm3  ; sum_s
-        paddusw         xmm14, xmm4  ; sum_r
-        movdqa          xmm1, xmm3
-        pmaddwd         xmm1, xmm1
-        paddd           xmm13, xmm1 ; sum_sq_s
-        movdqa          xmm2, xmm4
-        pmaddwd         xmm2, xmm2
-        paddd           xmm12, xmm2 ; sum_sq_r
-        pmaddwd         xmm3, xmm4
-        paddd           xmm11, xmm3  ; sum_sxr
-%endmacro
-
-; Sum across the register %1 starting with q words
-%macro SUM_ACROSS_Q 1
-        movdqa          xmm2,%1
-        punpckldq       %1,xmm0
-        punpckhdq       xmm2,xmm0
-        paddq           %1,xmm2
-        movdqa          xmm2,%1
-        punpcklqdq      %1,xmm0
-        punpckhqdq      xmm2,xmm0
-        paddq           %1,xmm2
-%endmacro
-
-; Sum across the register %1 starting with q words
-%macro SUM_ACROSS_W 1
-        movdqa          xmm1, %1
-        punpcklwd       %1,xmm0
-        punpckhwd       xmm1,xmm0
-        paddd           %1, xmm1
-        SUM_ACROSS_Q    %1
-%endmacro
-
-SECTION .text
-
-;void ssim_parms_sse2(
-;    unsigned char *s,
-;    int sp,
-;    unsigned char *r,
-;    int rp
-;    unsigned long *sum_s,
-;    unsigned long *sum_r,
-;    unsigned long *sum_sq_s,
-;    unsigned long *sum_sq_r,
-;    unsigned long *sum_sxr);
-;
-; TODO: Use parm passing through structure, probably don't need the pxors
-; ( calling app will initialize to 0 ) could easily fit everything in sse2
-; without too much hastle, and can probably do better estimates with psadw
-; or pavgb At this point this is just meant to be first pass for calculating
-; all the parms needed for 16x16 ssim so we can play with dssim as distortion
-; in mode selection code.
-global sym(av1_ssim_parms_16x16_sse2) PRIVATE
-sym(av1_ssim_parms_16x16_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 9
-    SAVE_XMM 15
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    mov             rsi,        arg(0) ;s
-    mov             rcx,        arg(1) ;sp
-    mov             rdi,        arg(2) ;r
-    mov             rax,        arg(3) ;rp
-
-    pxor            xmm0, xmm0
-    pxor            xmm15,xmm15  ;sum_s
-    pxor            xmm14,xmm14  ;sum_r
-    pxor            xmm13,xmm13  ;sum_sq_s
-    pxor            xmm12,xmm12  ;sum_sq_r
-    pxor            xmm11,xmm11  ;sum_sxr
-
-    mov             rdx, 16      ;row counter
-.NextRow:
-
-    ;grab source and reference pixels
-    movdqu          xmm5, [rsi]
-    movdqu          xmm6, [rdi]
-    movdqa          xmm3, xmm5
-    movdqa          xmm4, xmm6
-    punpckhbw       xmm3, xmm0 ; high_s
-    punpckhbw       xmm4, xmm0 ; high_r
-
-    TABULATE_SSIM
-
-    movdqa          xmm3, xmm5
-    movdqa          xmm4, xmm6
-    punpcklbw       xmm3, xmm0 ; low_s
-    punpcklbw       xmm4, xmm0 ; low_r
-
-    TABULATE_SSIM
-
-    add             rsi, rcx   ; next s row
-    add             rdi, rax   ; next r row
-
-    dec             rdx        ; counter
-    jnz .NextRow
-
-    SUM_ACROSS_W    xmm15
-    SUM_ACROSS_W    xmm14
-    SUM_ACROSS_Q    xmm13
-    SUM_ACROSS_Q    xmm12
-    SUM_ACROSS_Q    xmm11
-
-    mov             rdi,arg(4)
-    movd            [rdi], xmm15;
-    mov             rdi,arg(5)
-    movd            [rdi], xmm14;
-    mov             rdi,arg(6)
-    movd            [rdi], xmm13;
-    mov             rdi,arg(7)
-    movd            [rdi], xmm12;
-    mov             rdi,arg(8)
-    movd            [rdi], xmm11;
-
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    RESTORE_XMM
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-;void ssim_parms_sse2(
-;    unsigned char *s,
-;    int sp,
-;    unsigned char *r,
-;    int rp
-;    unsigned long *sum_s,
-;    unsigned long *sum_r,
-;    unsigned long *sum_sq_s,
-;    unsigned long *sum_sq_r,
-;    unsigned long *sum_sxr);
-;
-; TODO: Use parm passing through structure, probably don't need the pxors
-; ( calling app will initialize to 0 ) could easily fit everything in sse2
-; without too much hastle, and can probably do better estimates with psadw
-; or pavgb At this point this is just meant to be first pass for calculating
-; all the parms needed for 16x16 ssim so we can play with dssim as distortion
-; in mode selection code.
-global sym(av1_ssim_parms_8x8_sse2) PRIVATE
-sym(av1_ssim_parms_8x8_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 9
-    SAVE_XMM 15
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    mov             rsi,        arg(0) ;s
-    mov             rcx,        arg(1) ;sp
-    mov             rdi,        arg(2) ;r
-    mov             rax,        arg(3) ;rp
-
-    pxor            xmm0, xmm0
-    pxor            xmm15,xmm15  ;sum_s
-    pxor            xmm14,xmm14  ;sum_r
-    pxor            xmm13,xmm13  ;sum_sq_s
-    pxor            xmm12,xmm12  ;sum_sq_r
-    pxor            xmm11,xmm11  ;sum_sxr
-
-    mov             rdx, 8      ;row counter
-.NextRow:
-
-    ;grab source and reference pixels
-    movq            xmm3, [rsi]
-    movq            xmm4, [rdi]
-    punpcklbw       xmm3, xmm0 ; low_s
-    punpcklbw       xmm4, xmm0 ; low_r
-
-    TABULATE_SSIM
-
-    add             rsi, rcx   ; next s row
-    add             rdi, rax   ; next r row
-
-    dec             rdx        ; counter
-    jnz .NextRow
-
-    SUM_ACROSS_W    xmm15
-    SUM_ACROSS_W    xmm14
-    SUM_ACROSS_Q    xmm13
-    SUM_ACROSS_Q    xmm12
-    SUM_ACROSS_Q    xmm11
-
-    mov             rdi,arg(4)
-    movd            [rdi], xmm15;
-    mov             rdi,arg(5)
-    movd            [rdi], xmm14;
-    mov             rdi,arg(6)
-    movd            [rdi], xmm13;
-    mov             rdi,arg(7)
-    movd            [rdi], xmm12;
-    mov             rdi,arg(8)
-    movd            [rdi], xmm11;
-
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    RESTORE_XMM
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
diff --git a/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h b/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h
deleted file mode 100644
index 6df2a8bdb..000000000
--- a/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_
-#define AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_
-
-#include <smmintrin.h>
-#include "av1/common/av1_txfm.h"
-#include "av1/common/x86/av1_txfm_sse4.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void av1_fdct4_new_sse4_1(const __m128i *input, __m128i *output,
-                          const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct8_new_sse4_1(const __m128i *input, __m128i *output,
-                          const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct16_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
-                           int8_t cos_bit);
-void av1_fdct64_new_sse4_1(const __m128i *input, __m128i *output,
-                           int8_t cos_bit, const int instride,
-                           const int outstride);
-
-void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_fadst8_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_fadst16_new_sse4_1(const __m128i *input, __m128i *output,
-                            const int8_t cos_bit, const int8_t *stage_range);
-
-void av1_idct4_new_sse4_1(const __m128i *input, __m128i *output,
-                          const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct8_new_sse4_1(const __m128i *input, __m128i *output,
-                          const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct16_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct32_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_idct64_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-
-void av1_iadst4_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_iadst8_new_sse4_1(const __m128i *input, __m128i *output,
-                           const int8_t cos_bit, const int8_t *stage_range);
-void av1_iadst16_new_sse4_1(const __m128i *input, __m128i *output,
-                            const int8_t cos_bit, const int8_t *stage_range);
-static INLINE void transpose_32_4x4(int stride, const __m128i *input,
-                                    __m128i *output) {
-  __m128i temp0 = _mm_unpacklo_epi32(input[0 * stride], input[2 * stride]);
-  __m128i temp1 = _mm_unpackhi_epi32(input[0 * stride], input[2 * stride]);
-  __m128i temp2 = _mm_unpacklo_epi32(input[1 * stride], input[3 * stride]);
-  __m128i temp3 = _mm_unpackhi_epi32(input[1 * stride], input[3 * stride]);
-
-  output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2);
-  output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2);
-  output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3);
-  output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3);
-}
-
-// the entire input block can be represent by a grid of 4x4 blocks
-// each 4x4 blocks can be represent by 4 vertical __m128i
-// we first transpose each 4x4 block internally
-// then transpose the grid
-static INLINE void transpose_32(int txfm_size, const __m128i *input,
-                                __m128i *output) {
-  const int num_per_128 = 4;
-  const int row_size = txfm_size;
-  const int col_size = txfm_size / num_per_128;
-  int r, c;
-
-  // transpose each 4x4 block internally
-  for (r = 0; r < row_size; r += 4) {
-    for (c = 0; c < col_size; c++) {
-      transpose_32_4x4(col_size, &input[r * col_size + c],
-                       &output[c * 4 * col_size + r / 4]);
-    }
-  }
-}
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-#define btf_32_sse4_1_type0(w0, w1, in0, in1, out0, out1, bit) \
-  do {                                                         \
-    const __m128i ww0 = _mm_set1_epi32(w0);                    \
-    const __m128i ww1 = _mm_set1_epi32(w1);                    \
-    const __m128i in0_w0 = _mm_mullo_epi32(in0, ww0);          \
-    const __m128i in1_w1 = _mm_mullo_epi32(in1, ww1);          \
-    out0 = _mm_add_epi32(in0_w0, in1_w1);                      \
-    out0 = av1_round_shift_32_sse4_1(out0, bit);               \
-    const __m128i in0_w1 = _mm_mullo_epi32(in0, ww1);          \
-    const __m128i in1_w0 = _mm_mullo_epi32(in1, ww0);          \
-    out1 = _mm_sub_epi32(in0_w1, in1_w0);                      \
-    out1 = av1_round_shift_32_sse4_1(out1, bit);               \
-  } while (0)
-
-// out0 = in0*w0 + in1*w1
-// out1 = in1*w0 - in0*w1
-#define btf_32_sse4_1_type1(w0, w1, in0, in1, out0, out1, bit) \
-  do {                                                         \
-    btf_32_sse4_1_type0(w1, w0, in1, in0, out0, out1, bit);    \
-  } while (0)
-
-// out0 = in0*w0 + in1*w1
-// out1 = -in1*w0 + in0*w1
-#define btf_32_type0_sse4_1_new(ww0, ww1, in0, in1, out0, out1, r, bit) \
-  do {                                                                  \
-    const __m128i in0_w0 = _mm_mullo_epi32(in0, ww0);                   \
-    const __m128i in1_w1 = _mm_mullo_epi32(in1, ww1);                   \
-    out0 = _mm_add_epi32(in0_w0, in1_w1);                               \
-    out0 = _mm_add_epi32(out0, r);                                      \
-    out0 = _mm_srai_epi32(out0, bit);                                   \
-    const __m128i in0_w1 = _mm_mullo_epi32(in0, ww1);                   \
-    const __m128i in1_w0 = _mm_mullo_epi32(in1, ww0);                   \
-    out1 = _mm_sub_epi32(in0_w1, in1_w0);                               \
-    out1 = _mm_add_epi32(out1, r);                                      \
-    out1 = _mm_srai_epi32(out1, bit);                                   \
-  } while (0)
-
-// out0 = in0*w0 + in1*w1
-// out1 = in1*w0 - in0*w1
-#define btf_32_type1_sse4_1_new(ww0, ww1, in0, in1, out0, out1, r, bit) \
-  do {                                                                  \
-    btf_32_type0_sse4_1_new(ww1, ww0, in1, in0, out0, out1, r, bit);    \
-  } while (0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_
diff --git a/third_party/aom/av1/encoder/x86/corner_match_sse4.c b/third_party/aom/av1/encoder/x86/corner_match_sse4.c
deleted file mode 100644
index 93f37b71d..000000000
--- a/third_party/aom/av1/encoder/x86/corner_match_sse4.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-#include <memory.h>
-#include <math.h>
-#include <assert.h>
-
-#include <smmintrin.h>
-
-#include "config/av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "av1/encoder/corner_match.h"
-
-DECLARE_ALIGNED(16, static const uint8_t, byte_mask[16]) = {
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0
-};
-#if MATCH_SZ != 13
-#error "Need to change byte_mask in corner_match_sse4.c if MATCH_SZ != 13"
-#endif
-
-/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the
-   correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows
-   of each image, centered at (x1, y1) and (x2, y2) respectively.
-*/
-double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1,
-                                        int y1, unsigned char *im2, int stride2,
-                                        int x2, int y2) {
-  int i;
-  // 2 16-bit partial sums in lanes 0, 4 (== 2 32-bit partial sums in lanes 0,
-  // 2)
-  __m128i sum1_vec = _mm_setzero_si128();
-  __m128i sum2_vec = _mm_setzero_si128();
-  // 4 32-bit partial sums of squares
-  __m128i sumsq2_vec = _mm_setzero_si128();
-  __m128i cross_vec = _mm_setzero_si128();
-
-  const __m128i mask = _mm_load_si128((__m128i *)byte_mask);
-  const __m128i zero = _mm_setzero_si128();
-
-  im1 += (y1 - MATCH_SZ_BY2) * stride1 + (x1 - MATCH_SZ_BY2);
-  im2 += (y2 - MATCH_SZ_BY2) * stride2 + (x2 - MATCH_SZ_BY2);
-
-  for (i = 0; i < MATCH_SZ; ++i) {
-    const __m128i v1 =
-        _mm_and_si128(_mm_loadu_si128((__m128i *)&im1[i * stride1]), mask);
-    const __m128i v2 =
-        _mm_and_si128(_mm_loadu_si128((__m128i *)&im2[i * stride2]), mask);
-
-    // Using the 'sad' intrinsic here is a bit faster than adding
-    // v1_l + v1_r and v2_l + v2_r, plus it avoids the need for a 16->32 bit
-    // conversion step later, for a net speedup of ~10%
-    sum1_vec = _mm_add_epi16(sum1_vec, _mm_sad_epu8(v1, zero));
-    sum2_vec = _mm_add_epi16(sum2_vec, _mm_sad_epu8(v2, zero));
-
-    const __m128i v1_l = _mm_cvtepu8_epi16(v1);
-    const __m128i v1_r = _mm_cvtepu8_epi16(_mm_srli_si128(v1, 8));
-    const __m128i v2_l = _mm_cvtepu8_epi16(v2);
-    const __m128i v2_r = _mm_cvtepu8_epi16(_mm_srli_si128(v2, 8));
-
-    sumsq2_vec = _mm_add_epi32(
-        sumsq2_vec,
-        _mm_add_epi32(_mm_madd_epi16(v2_l, v2_l), _mm_madd_epi16(v2_r, v2_r)));
-    cross_vec = _mm_add_epi32(
-        cross_vec,
-        _mm_add_epi32(_mm_madd_epi16(v1_l, v2_l), _mm_madd_epi16(v1_r, v2_r)));
-  }
-
-  // Now we can treat the four registers (sum1_vec, sum2_vec, sumsq2_vec,
-  // cross_vec)
-  // as holding 4 32-bit elements each, which we want to sum horizontally.
-  // We do this by transposing and then summing vertically.
-  __m128i tmp_0 = _mm_unpacklo_epi32(sum1_vec, sum2_vec);
-  __m128i tmp_1 = _mm_unpackhi_epi32(sum1_vec, sum2_vec);
-  __m128i tmp_2 = _mm_unpacklo_epi32(sumsq2_vec, cross_vec);
-  __m128i tmp_3 = _mm_unpackhi_epi32(sumsq2_vec, cross_vec);
-
-  __m128i tmp_4 = _mm_unpacklo_epi64(tmp_0, tmp_2);
-  __m128i tmp_5 = _mm_unpackhi_epi64(tmp_0, tmp_2);
-  __m128i tmp_6 = _mm_unpacklo_epi64(tmp_1, tmp_3);
-  __m128i tmp_7 = _mm_unpackhi_epi64(tmp_1, tmp_3);
-
-  __m128i res =
-      _mm_add_epi32(_mm_add_epi32(tmp_4, tmp_5), _mm_add_epi32(tmp_6, tmp_7));
-
-  int sum1 = _mm_extract_epi32(res, 0);
-  int sum2 = _mm_extract_epi32(res, 1);
-  int sumsq2 = _mm_extract_epi32(res, 2);
-  int cross = _mm_extract_epi32(res, 3);
-
-  int var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2;
-  int cov = cross * MATCH_SZ_SQ - sum1 * sum2;
-  return cov / sqrt((double)var2);
-}
diff --git a/third_party/aom/av1/encoder/x86/dct_sse2.asm b/third_party/aom/av1/encoder/x86/dct_sse2.asm
deleted file mode 100644
index b18554818..000000000
--- a/third_party/aom/av1/encoder/x86/dct_sse2.asm
+++ /dev/null
@@ -1,82 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-%define private_prefix av1
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro TRANSFORM_COLS 0
-  paddw           m0,        m1
-  movq            m4,        m0
-  psubw           m3,        m2
-  psubw           m4,        m3
-  psraw           m4,        1
-  movq            m5,        m4
-  psubw           m5,        m1 ;b1
-  psubw           m4,        m2 ;c1
-  psubw           m0,        m4
-  paddw           m3,        m5
-                                ; m0 a0
-  SWAP            1,         4  ; m1 c1
-  SWAP            2,         3  ; m2 d1
-  SWAP            3,         5  ; m3 b1
-%endmacro
-
-%macro TRANSPOSE_4X4 0
-                                ; 00 01 02 03
-                                ; 10 11 12 13
-                                ; 20 21 22 23
-                                ; 30 31 32 33
-  punpcklwd       m0,        m1 ; 00 10 01 11  02 12 03 13
-  punpcklwd       m2,        m3 ; 20 30 21 31  22 32 23 33
-  mova            m1,        m0
-  punpckldq       m0,        m2 ; 00 10 20 30  01 11 21 31
-  punpckhdq       m1,        m2 ; 02 12 22 32  03 13 23 33
-%endmacro
-
-INIT_XMM sse2
-cglobal fwht4x4, 3, 4, 8, input, output, stride
-  lea             r3q,       [inputq + strideq*4]
-  movq            m0,        [inputq] ;a1
-  movq            m1,        [inputq + strideq*2] ;b1
-  movq            m2,        [r3q] ;c1
-  movq            m3,        [r3q + strideq*2] ;d1
-
-  TRANSFORM_COLS
-  TRANSPOSE_4X4
-  SWAP            1,         2
-  psrldq          m1,        m0, 8
-  psrldq          m3,        m2, 8
-  TRANSFORM_COLS
-  TRANSPOSE_4X4
-
-  psllw           m0,        2
-  psllw           m1,        2
-
-  ; sign extension
-  mova            m2,             m0
-  mova            m3,             m1
-  punpcklwd       m0,             m0
-  punpcklwd       m1,             m1
-  punpckhwd       m2,             m2
-  punpckhwd       m3,             m3
-  psrad           m0,             16
-  psrad           m1,             16
-  psrad           m2,             16
-  psrad           m3,             16
-  mova            [outputq],      m0
-  mova            [outputq + 16], m2
-  mova            [outputq + 32], m1
-  mova            [outputq + 48], m3
-
-  RET
diff --git a/third_party/aom/av1/encoder/x86/encodetxb_avx2.c b/third_party/aom/av1/encoder/x86/encodetxb_avx2.c
deleted file mode 100644
index 7642f57d1..000000000
--- a/third_party/aom/av1/encoder/x86/encodetxb_avx2.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>  // SSE2
-#include <smmintrin.h>  /* SSE4.1 */
-#include <immintrin.h>  /* AVX2 */
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/mem_sse2.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-
-void av1_txb_init_levels_avx2(const tran_low_t *const coeff, const int width,
-                              const int height, uint8_t *const levels) {
-  const int stride = width + TX_PAD_HOR;
-  const __m256i y_zeros = _mm256_setzero_si256();
-
-  const int32_t pre_len = sizeof(*levels) * TX_PAD_TOP * stride;
-  uint8_t *pre_buf = levels - TX_PAD_TOP * stride;
-  uint8_t *pre_buf_end = pre_buf + pre_len;
-  do {
-    yy_storeu_256(pre_buf, y_zeros);
-    pre_buf += 32;
-  } while (pre_buf < pre_buf_end);
-
-  const int32_t bottom_len = sizeof(*levels) * (TX_PAD_BOTTOM * stride);
-  uint8_t *bottom_buf_end = levels + (height + TX_PAD_BOTTOM) * stride;
-  uint8_t *bottom_buf = bottom_buf_end - ((bottom_len + 31) & (~31));
-
-  do {
-    yy_storeu_256(bottom_buf, y_zeros);
-    bottom_buf += 32;
-  } while (bottom_buf < bottom_buf_end);
-
-  int i = 0;
-  uint8_t *ls = levels;
-  const tran_low_t *cf = coeff;
-  if (width == 4) {
-    do {
-      const __m256i c0 = yy_loadu_256(cf);
-      const __m256i c1 = yy_loadu_256(cf + 8);
-      const __m256i abs01 = _mm256_abs_epi16(_mm256_packs_epi32(c0, c1));
-      const __m256i abs01_8 = _mm256_packs_epi16(abs01, y_zeros);
-      const __m256i res_ = _mm256_shuffle_epi32(abs01_8, 0xd8);
-      const __m256i res = _mm256_permute4x64_epi64(res_, 0xd8);
-      yy_storeu_256(ls, res);
-      ls += 32;
-      cf += 16;
-      i += 4;
-    } while (i < height);
-  } else if (width == 8) {
-    do {
-      const __m256i coeffA = yy_loadu_256(cf);
-      const __m256i coeffB = yy_loadu_256(cf + 8);
-      const __m256i coeffC = yy_loadu_256(cf + 16);
-      const __m256i coeffD = yy_loadu_256(cf + 24);
-      const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB);
-      const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
-      const __m256i absAB = _mm256_abs_epi16(coeffAB);
-      const __m256i absCD = _mm256_abs_epi16(coeffCD);
-      const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
-      const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
-      const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
-      const __m128i res0 = _mm256_castsi256_si128(res);
-      const __m128i res1 = _mm256_extracti128_si256(res, 1);
-      xx_storel_64(ls, res0);
-      *(int32_t *)(ls + width) = 0;
-      xx_storel_64(ls + stride, _mm_srli_si128(res0, 8));
-      *(int32_t *)(ls + width + stride) = 0;
-      xx_storel_64(ls + stride * 2, res1);
-      *(int32_t *)(ls + width + stride * 2) = 0;
-      xx_storel_64(ls + stride * 3, _mm_srli_si128(res1, 8));
-      *(int32_t *)(ls + width + stride * 3) = 0;
-      cf += 32;
-      ls += stride << 2;
-      i += 4;
-    } while (i < height);
-  } else if (width == 16) {
-    do {
-      const __m256i coeffA = yy_loadu_256(cf);
-      const __m256i coeffB = yy_loadu_256(cf + 8);
-      const __m256i coeffC = yy_loadu_256(cf + 16);
-      const __m256i coeffD = yy_loadu_256(cf + 24);
-      const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB);
-      const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
-      const __m256i absAB = _mm256_abs_epi16(coeffAB);
-      const __m256i absCD = _mm256_abs_epi16(coeffCD);
-      const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
-      const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
-      const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
-      xx_storeu_128(ls, _mm256_castsi256_si128(res));
-      xx_storeu_128(ls + stride, _mm256_extracti128_si256(res, 1));
-      cf += 32;
-      *(int32_t *)(ls + width) = 0;
-      *(int32_t *)(ls + stride + width) = 0;
-      ls += stride << 1;
-      i += 2;
-    } while (i < height);
-  } else {
-    do {
-      const __m256i coeffA = yy_loadu_256(cf);
-      const __m256i coeffB = yy_loadu_256(cf + 8);
-      const __m256i coeffC = yy_loadu_256(cf + 16);
-      const __m256i coeffD = yy_loadu_256(cf + 24);
-      const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB);
-      const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD);
-      const __m256i absAB = _mm256_abs_epi16(coeffAB);
-      const __m256i absCD = _mm256_abs_epi16(coeffCD);
-      const __m256i absABCD = _mm256_packs_epi16(absAB, absCD);
-      const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8);
-      const __m256i res = _mm256_shuffle_epi32(res_, 0xd8);
-      yy_storeu_256(ls, res);
-      cf += 32;
-      *(int32_t *)(ls + width) = 0;
-      ls += stride;
-      i += 1;
-    } while (i < height);
-  }
-}
diff --git a/third_party/aom/av1/encoder/x86/encodetxb_sse2.c b/third_party/aom/av1/encoder/x86/encodetxb_sse2.c
deleted file mode 100644
index dedb4d02f..000000000
--- a/third_party/aom/av1/encoder/x86/encodetxb_sse2.c
+++ /dev/null
@@ -1,505 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>  // SSE2
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/x86/mem_sse2.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-
-static INLINE void load_levels_4x4x5_sse2(const uint8_t *const src,
-                                          const int stride,
-                                          const ptrdiff_t *const offsets,
-                                          __m128i *const level) {
-  level[0] = load_8bit_4x4_to_1_reg_sse2(src + 1, stride);
-  level[1] = load_8bit_4x4_to_1_reg_sse2(src + stride, stride);
-  level[2] = load_8bit_4x4_to_1_reg_sse2(src + offsets[0], stride);
-  level[3] = load_8bit_4x4_to_1_reg_sse2(src + offsets[1], stride);
-  level[4] = load_8bit_4x4_to_1_reg_sse2(src + offsets[2], stride);
-}
-
-static INLINE void load_levels_8x2x5_sse2(const uint8_t *const src,
-                                          const int stride,
-                                          const ptrdiff_t *const offsets,
-                                          __m128i *const level) {
-  level[0] = load_8bit_8x2_to_1_reg_sse2(src + 1, stride);
-  level[1] = load_8bit_8x2_to_1_reg_sse2(src + stride, stride);
-  level[2] = load_8bit_8x2_to_1_reg_sse2(src + offsets[0], stride);
-  level[3] = load_8bit_8x2_to_1_reg_sse2(src + offsets[1], stride);
-  level[4] = load_8bit_8x2_to_1_reg_sse2(src + offsets[2], stride);
-}
-
-static INLINE void load_levels_16x1x5_sse2(const uint8_t *const src,
-                                           const int stride,
-                                           const ptrdiff_t *const offsets,
-                                           __m128i *const level) {
-  level[0] = _mm_loadu_si128((__m128i *)(src + 1));
-  level[1] = _mm_loadu_si128((__m128i *)(src + stride));
-  level[2] = _mm_loadu_si128((__m128i *)(src + offsets[0]));
-  level[3] = _mm_loadu_si128((__m128i *)(src + offsets[1]));
-  level[4] = _mm_loadu_si128((__m128i *)(src + offsets[2]));
-}
-
-static INLINE __m128i get_coeff_contexts_kernel_sse2(__m128i *const level) {
-  const __m128i const_3 = _mm_set1_epi8(3);
-  const __m128i const_4 = _mm_set1_epi8(4);
-  __m128i count;
-
-  count = _mm_min_epu8(level[0], const_3);
-  level[1] = _mm_min_epu8(level[1], const_3);
-  level[2] = _mm_min_epu8(level[2], const_3);
-  level[3] = _mm_min_epu8(level[3], const_3);
-  level[4] = _mm_min_epu8(level[4], const_3);
-  count = _mm_add_epi8(count, level[1]);
-  count = _mm_add_epi8(count, level[2]);
-  count = _mm_add_epi8(count, level[3]);
-  count = _mm_add_epi8(count, level[4]);
-  count = _mm_avg_epu8(count, _mm_setzero_si128());
-  count = _mm_min_epu8(count, const_4);
-  return count;
-}
-
-static INLINE void get_4_nz_map_contexts_2d(const uint8_t *levels,
-                                            const int height,
-                                            const ptrdiff_t *const offsets,
-                                            int8_t *const coeff_contexts) {
-  const int stride = 4 + TX_PAD_HOR;
-  const __m128i pos_to_offset_large = _mm_set1_epi8(21);
-  __m128i pos_to_offset =
-      (height == 4)
-          ? _mm_setr_epi8(0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21)
-          : _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 6, 21,
-                          21, 21);
-  __m128i count;
-  __m128i level[5];
-  int8_t *cc = coeff_contexts;
-  int row = height;
-
-  assert(!(height % 4));
-
-  do {
-    load_levels_4x4x5_sse2(levels, stride, offsets, level);
-    count = get_coeff_contexts_kernel_sse2(level);
-    count = _mm_add_epi8(count, pos_to_offset);
-    _mm_store_si128((__m128i *)cc, count);
-    pos_to_offset = pos_to_offset_large;
-    levels += 4 * stride;
-    cc += 16;
-    row -= 4;
-  } while (row);
-
-  coeff_contexts[0] = 0;
-}
-
-static INLINE void get_4_nz_map_contexts_hor(const uint8_t *levels,
-                                             const int height,
-                                             const ptrdiff_t *const offsets,
-                                             int8_t *coeff_contexts) {
-  const int stride = 4 + TX_PAD_HOR;
-  const __m128i pos_to_offset =
-      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
-  __m128i count;
-  __m128i level[5];
-  int row = height;
-
-  assert(!(height % 4));
-
-  do {
-    load_levels_4x4x5_sse2(levels, stride, offsets, level);
-    count = get_coeff_contexts_kernel_sse2(level);
-    count = _mm_add_epi8(count, pos_to_offset);
-    _mm_store_si128((__m128i *)coeff_contexts, count);
-    levels += 4 * stride;
-    coeff_contexts += 16;
-    row -= 4;
-  } while (row);
-}
-
-static INLINE void get_4_nz_map_contexts_ver(const uint8_t *levels,
-                                             const int height,
-                                             const ptrdiff_t *const offsets,
-                                             int8_t *coeff_contexts) {
-  const int stride = 4 + TX_PAD_HOR;
-  const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
-  __m128i pos_to_offset =
-      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
-                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
-                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
-  __m128i count;
-  __m128i level[5];
-  int row = height;
-
-  assert(!(height % 4));
-
-  do {
-    load_levels_4x4x5_sse2(levels, stride, offsets, level);
-    count = get_coeff_contexts_kernel_sse2(level);
-    count = _mm_add_epi8(count, pos_to_offset);
-    _mm_store_si128((__m128i *)coeff_contexts, count);
-    pos_to_offset = pos_to_offset_large;
-    levels += 4 * stride;
-    coeff_contexts += 16;
-    row -= 4;
-  } while (row);
-}
-
-static INLINE void get_8_coeff_contexts_2d(const uint8_t *levels,
-                                           const int height,
-                                           const ptrdiff_t *const offsets,
-                                           int8_t *coeff_contexts) {
-  const int stride = 8 + TX_PAD_HOR;
-  int8_t *cc = coeff_contexts;
-  int row = height;
-  __m128i count;
-  __m128i level[5];
-  __m128i pos_to_offset[3];
-
-  assert(!(height % 2));
-
-  if (height == 8) {
-    pos_to_offset[0] =
-        _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21);
-    pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
-                                     21, 21, 21, 21, 21);
-  } else if (height < 8) {
-    pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 16, 16, 6, 21,
-                                     21, 21, 21, 21);
-    pos_to_offset[1] = _mm_setr_epi8(16, 16, 21, 21, 21, 21, 21, 21, 16, 16, 21,
-                                     21, 21, 21, 21, 21);
-  } else {
-    pos_to_offset[0] = _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-                                     11, 11, 11, 11, 11);
-    pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
-                                     21, 21, 21, 21, 21);
-  }
-  pos_to_offset[2] = _mm_set1_epi8(21);
-
-  do {
-    load_levels_8x2x5_sse2(levels, stride, offsets, level);
-    count = get_coeff_contexts_kernel_sse2(level);
-    count = _mm_add_epi8(count, pos_to_offset[0]);
-    _mm_store_si128((__m128i *)cc, count);
-    pos_to_offset[0] = pos_to_offset[1];
-    pos_to_offset[1] = pos_to_offset[2];
-    levels += 2 * stride;
-    cc += 16;
-    row -= 2;
-  } while (row);
-
-  coeff_contexts[0] = 0;
-}
-
-static INLINE void get_8_coeff_contexts_hor(const uint8_t *levels,
-                                            const int height,
-                                            const ptrdiff_t *const offsets,
-                                            int8_t *coeff_contexts) {
-  const int stride = 8 + TX_PAD_HOR;
-  const __m128i pos_to_offset =
-      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
-  int row = height;
-  __m128i count;
-  __m128i level[5];
-
-  assert(!(height % 2));
-
-  do {
-    load_levels_8x2x5_sse2(levels, stride, offsets, level);
-    count = get_coeff_contexts_kernel_sse2(level);
-    count = _mm_add_epi8(count, pos_to_offset);
-    _mm_store_si128((__m128i *)coeff_contexts, count);
-    levels += 2 * stride;
-    coeff_contexts += 16;
-    row -= 2;
-  } while (row);
-}
-
-static INLINE void get_8_coeff_contexts_ver(const uint8_t *levels,
-                                            const int height,
-                                            const ptrdiff_t *const offsets,
-                                            int8_t *coeff_contexts) {
-  const int stride = 8 + TX_PAD_HOR;
-  const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
-  __m128i pos_to_offset =
-      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
-                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
-                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
-                    SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
-                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
-                    SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5);
-  int row = height;
-  __m128i count;
-  __m128i level[5];
-
-  assert(!(height % 2));
-
-  do {
-    load_levels_8x2x5_sse2(levels, stride, offsets, level);
-    count = get_coeff_contexts_kernel_sse2(level);
-    count = _mm_add_epi8(count, pos_to_offset);
-    _mm_store_si128((__m128i *)coeff_contexts, count);
-    pos_to_offset = pos_to_offset_large;
-    levels += 2 * stride;
-    coeff_contexts += 16;
-    row -= 2;
-  } while (row);
-}
-
-static INLINE void get_16n_coeff_contexts_2d(const uint8_t *levels,
-                                             const int real_width,
-                                             const int real_height,
-                                             const int width, const int height,
-                                             const ptrdiff_t *const offsets,
-                                             int8_t *coeff_contexts) {
-  const int stride = width + TX_PAD_HOR;
-  int8_t *cc = coeff_contexts;
-  int row = height;
-  __m128i pos_to_offset[5];
-  __m128i pos_to_offset_large[3];
-  __m128i count;
-  __m128i level[5];
-
-  assert(!(width % 16));
-
-  pos_to_offset_large[2] = _mm_set1_epi8(21);
-  if (real_width == real_height) {
-    pos_to_offset[0] = _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
-                                     21, 21, 21, 21);
-    pos_to_offset[1] = _mm_setr_epi8(1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
-                                     21, 21, 21, 21, 21);
-    pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-                                     21, 21, 21, 21, 21);
-    pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-                                     21, 21, 21, 21, 21);
-    pos_to_offset[4] = pos_to_offset_large[0] = pos_to_offset_large[1] =
-        pos_to_offset_large[2];
-  } else if (real_width > real_height) {
-    pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21,
-                                     21, 21, 21, 21, 21);
-    pos_to_offset[1] = _mm_setr_epi8(16, 16, 6, 21, 21, 21, 21, 21, 21, 21, 21,
-                                     21, 21, 21, 21, 21);
-    pos_to_offset[2] = pos_to_offset[3] = pos_to_offset[4] = _mm_setr_epi8(
-        16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21);
-    pos_to_offset_large[0] = pos_to_offset_large[1] = pos_to_offset_large[2];
-  } else {  // real_width < real_height
-    pos_to_offset[0] = pos_to_offset[1] = _mm_setr_epi8(
-        11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11);
-    pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-                                     21, 21, 21, 21, 21);
-    pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-                                     21, 21, 21, 21, 21);
-    pos_to_offset[4] = pos_to_offset_large[2];
-    pos_to_offset_large[0] = pos_to_offset_large[1] = _mm_set1_epi8(11);
-  }
-
-  do {
-    int w = width;
-
-    do {
-      load_levels_16x1x5_sse2(levels, stride, offsets, level);
-      count = get_coeff_contexts_kernel_sse2(level);
-      count = _mm_add_epi8(count, pos_to_offset[0]);
-      _mm_store_si128((__m128i *)cc, count);
-      levels += 16;
-      cc += 16;
-      w -= 16;
-      pos_to_offset[0] = pos_to_offset_large[0];
-    } while (w);
-
-    pos_to_offset[0] = pos_to_offset[1];
-    pos_to_offset[1] = pos_to_offset[2];
-    pos_to_offset[2] = pos_to_offset[3];
-    pos_to_offset[3] = pos_to_offset[4];
-    pos_to_offset_large[0] = pos_to_offset_large[1];
-    pos_to_offset_large[1] = pos_to_offset_large[2];
-    levels += TX_PAD_HOR;
-  } while (--row);
-
-  coeff_contexts[0] = 0;
-}
-
-static INLINE void get_16n_coeff_contexts_hor(const uint8_t *levels,
-                                              const int width, const int height,
-                                              const ptrdiff_t *const offsets,
-                                              int8_t *coeff_contexts) {
-  const int stride = width + TX_PAD_HOR;
-  const __m128i pos_to_offset_large =
-      _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                    SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
-  __m128i count;
-  __m128i level[5];
-  int row = height;
-
-  assert(!(width % 16));
-
-  do {
-    __m128i pos_to_offset =
-        _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
-                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
-                      SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
-    int w = width;
-
-    do {
-      load_levels_16x1x5_sse2(levels, stride, offsets, level);
-      count = get_coeff_contexts_kernel_sse2(level);
-      count = _mm_add_epi8(count, pos_to_offset);
-      _mm_store_si128((__m128i *)coeff_contexts, count);
-      pos_to_offset = pos_to_offset_large;
-      levels += 16;
-      coeff_contexts += 16;
-      w -= 16;
-    } while (w);
-
-    levels += TX_PAD_HOR;
-  } while (--row);
-}
-
-static INLINE void get_16n_coeff_contexts_ver(const uint8_t *levels,
-                                              const int width, const int height,
-                                              const ptrdiff_t *const offsets,
-                                              int8_t *coeff_contexts) {
-  const int stride = width + TX_PAD_HOR;
-  __m128i pos_to_offset[3];
-  __m128i count;
-  __m128i level[5];
-  int row = height;
-
-  assert(!(width % 16));
-
-  pos_to_offset[0] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 0);
-  pos_to_offset[1] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 5);
-  pos_to_offset[2] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
-
-  do {
-    int w = width;
-
-    do {
-      load_levels_16x1x5_sse2(levels, stride, offsets, level);
-      count = get_coeff_contexts_kernel_sse2(level);
-      count = _mm_add_epi8(count, pos_to_offset[0]);
-      _mm_store_si128((__m128i *)coeff_contexts, count);
-      levels += 16;
-      coeff_contexts += 16;
-      w -= 16;
-    } while (w);
-
-    pos_to_offset[0] = pos_to_offset[1];
-    pos_to_offset[1] = pos_to_offset[2];
-    levels += TX_PAD_HOR;
-  } while (--row);
-}
-
-// Note: levels[] must be in the range [0, 127], inclusive.
-void av1_get_nz_map_contexts_sse2(const uint8_t *const levels,
-                                  const int16_t *const scan, const uint16_t eob,
-                                  const TX_SIZE tx_size,
-                                  const TX_CLASS tx_class,
-                                  int8_t *const coeff_contexts) {
-  const int last_idx = eob - 1;
-  if (!last_idx) {
-    coeff_contexts[0] = 0;
-    return;
-  }
-
-  const int real_width = tx_size_wide[tx_size];
-  const int real_height = tx_size_high[tx_size];
-  const int width = get_txb_wide(tx_size);
-  const int height = get_txb_high(tx_size);
-  const int stride = width + TX_PAD_HOR;
-  ptrdiff_t offsets[3];
-
-  /* coeff_contexts must be 16 byte aligned. */
-  assert(!((intptr_t)coeff_contexts & 0xf));
-
-  if (tx_class == TX_CLASS_2D) {
-    offsets[0] = 0 * stride + 2;
-    offsets[1] = 1 * stride + 1;
-    offsets[2] = 2 * stride + 0;
-
-    if (width == 4) {
-      get_4_nz_map_contexts_2d(levels, height, offsets, coeff_contexts);
-    } else if (width == 8) {
-      get_8_coeff_contexts_2d(levels, height, offsets, coeff_contexts);
-    } else if (width == 16) {
-      get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
-                                offsets, coeff_contexts);
-    } else {
-      get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
-                                offsets, coeff_contexts);
-    }
-  } else if (tx_class == TX_CLASS_HORIZ) {
-    offsets[0] = 2;
-    offsets[1] = 3;
-    offsets[2] = 4;
-    if (width == 4) {
-      get_4_nz_map_contexts_hor(levels, height, offsets, coeff_contexts);
-    } else if (width == 8) {
-      get_8_coeff_contexts_hor(levels, height, offsets, coeff_contexts);
-    } else {
-      get_16n_coeff_contexts_hor(levels, width, height, offsets,
-                                 coeff_contexts);
-    }
-  } else {  // TX_CLASS_VERT
-    offsets[0] = 2 * stride;
-    offsets[1] = 3 * stride;
-    offsets[2] = 4 * stride;
-    if (width == 4) {
-      get_4_nz_map_contexts_ver(levels, height, offsets, coeff_contexts);
-    } else if (width == 8) {
-      get_8_coeff_contexts_ver(levels, height, offsets, coeff_contexts);
-    } else {
-      get_16n_coeff_contexts_ver(levels, width, height, offsets,
-                                 coeff_contexts);
-    }
-  }
-
-  const int bwl = get_txb_bwl(tx_size);
-  const int pos = scan[last_idx];
-  if (last_idx <= (height << bwl) / 8)
-    coeff_contexts[pos] = 1;
-  else if (last_idx <= (height << bwl) / 4)
-    coeff_contexts[pos] = 2;
-  else
-    coeff_contexts[pos] = 3;
-}
diff --git a/third_party/aom/av1/encoder/x86/encodetxb_sse4.c b/third_party/aom/av1/encoder/x86/encodetxb_sse4.c
deleted file mode 100644
index 5e0687cd3..000000000
--- a/third_party/aom/av1/encoder/x86/encodetxb_sse4.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>  // SSE2
-#include <smmintrin.h>  /* SSE4.1 */
-
-#include "aom/aom_integer.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/txb_common.h"
-#include "aom_dsp/x86/synonyms.h"
-
-void av1_txb_init_levels_sse4_1(const tran_low_t *const coeff, const int width,
-                                const int height, uint8_t *const levels) {
-  const int stride = width + TX_PAD_HOR;
-  const __m128i zeros = _mm_setzero_si128();
-
-  const int32_t pre_len = sizeof(*levels) * TX_PAD_TOP * stride;
-  uint8_t *pre_buf = levels - TX_PAD_TOP * stride;
-  uint8_t *pre_buf_end = pre_buf + pre_len;
-  do {
-    _mm_storeu_si128((__m128i *)(pre_buf), zeros);
-    pre_buf += 16;
-  } while (pre_buf < pre_buf_end);
-
-  const int32_t bottom_len = sizeof(*levels) * (TX_PAD_BOTTOM * stride);
-  uint8_t *bottom_buf = levels + stride * height;
-  uint8_t *bottom_buf_end = bottom_buf + bottom_len;
-  do {
-    _mm_storeu_si128((__m128i *)(bottom_buf), zeros);
-    bottom_buf += 16;
-  } while (bottom_buf < bottom_buf_end);
-
-  int i = 0;
-  uint8_t *ls = levels;
-  const tran_low_t *cf = coeff;
-  if (width == 4) {
-    do {
-      const __m128i coeffA = xx_loadu_128(cf);
-      const __m128i coeffB = xx_loadu_128(cf + 4);
-      const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
-      const __m128i absAB = _mm_abs_epi16(coeffAB);
-      const __m128i absAB8 = _mm_packs_epi16(absAB, zeros);
-      const __m128i lsAB = _mm_unpacklo_epi32(absAB8, zeros);
-      xx_storeu_128(ls, lsAB);
-      ls += (stride << 1);
-      cf += (width << 1);
-      i += 2;
-    } while (i < height);
-  } else if (width == 8) {
-    do {
-      const __m128i coeffA = xx_loadu_128(cf);
-      const __m128i coeffB = xx_loadu_128(cf + 4);
-      const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
-      const __m128i absAB = _mm_abs_epi16(coeffAB);
-      const __m128i absAB8 = _mm_packs_epi16(absAB, zeros);
-      xx_storeu_128(ls, absAB8);
-      ls += stride;
-      cf += width;
-      i += 1;
-    } while (i < height);
-  } else {
-    do {
-      int j = 0;
-      do {
-        const __m128i coeffA = xx_loadu_128(cf);
-        const __m128i coeffB = xx_loadu_128(cf + 4);
-        const __m128i coeffC = xx_loadu_128(cf + 8);
-        const __m128i coeffD = xx_loadu_128(cf + 12);
-        const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB);
-        const __m128i coeffCD = _mm_packs_epi32(coeffC, coeffD);
-        const __m128i absAB = _mm_abs_epi16(coeffAB);
-        const __m128i absCD = _mm_abs_epi16(coeffCD);
-        const __m128i absABCD = _mm_packs_epi16(absAB, absCD);
-        xx_storeu_128(ls + j, absABCD);
-        j += 16;
-        cf += 16;
-      } while (j < width);
-      *(int32_t *)(ls + width) = 0;
-      ls += stride;
-      i += 1;
-    } while (i < height);
-  }
-}
diff --git a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
deleted file mode 100644
index 7d4f69585..000000000
--- a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>  // AVX2
-
-#include "config/av1_rtcd.h"
-
-#include "aom/aom_integer.h"
-
-static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
-                              __m256i *c) {
-  const tran_low_t *addr = coeff + offset;
-
-  if (sizeof(tran_low_t) == 4) {
-    const __m256i x0 = _mm256_loadu_si256((const __m256i *)addr);
-    const __m256i x1 = _mm256_loadu_si256((const __m256i *)addr + 1);
-    const __m256i y = _mm256_packs_epi32(x0, x1);
-    *c = _mm256_permute4x64_epi64(y, 0xD8);
-  } else {
-    *c = _mm256_loadu_si256((const __m256i *)addr);
-  }
-}
-
-int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff,
-                             intptr_t block_size, int64_t *ssz) {
-  __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
-  __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
-  __m256i sse_reg_64hi, ssz_reg_64hi;
-  __m128i sse_reg128, ssz_reg128;
-  int64_t sse;
-  int i;
-  const __m256i zero_reg = _mm256_setzero_si256();
-
-  // init sse and ssz registerd to zero
-  sse_reg = _mm256_setzero_si256();
-  ssz_reg = _mm256_setzero_si256();
-
-  for (i = 0; i < block_size; i += 16) {
-    // load 32 bytes from coeff and dqcoeff
-    read_coeff(coeff, i, &coeff_reg);
-    read_coeff(dqcoeff, i, &dqcoeff_reg);
-    // dqcoeff - coeff
-    dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg);
-    // madd (dqcoeff - coeff)
-    dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg);
-    // madd coeff
-    coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg);
-    // expand each double word of madd (dqcoeff - coeff) to quad word
-    exp_dqcoeff_lo = _mm256_unpacklo_epi32(dqcoeff_reg, zero_reg);
-    exp_dqcoeff_hi = _mm256_unpackhi_epi32(dqcoeff_reg, zero_reg);
-    // expand each double word of madd (coeff) to quad word
-    exp_coeff_lo = _mm256_unpacklo_epi32(coeff_reg, zero_reg);
-    exp_coeff_hi = _mm256_unpackhi_epi32(coeff_reg, zero_reg);
-    // add each quad word of madd (dqcoeff - coeff) and madd (coeff)
-    sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_lo);
-    ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_lo);
-    sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_hi);
-    ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_hi);
-  }
-  // save the higher 64 bit of each 128 bit lane
-  sse_reg_64hi = _mm256_srli_si256(sse_reg, 8);
-  ssz_reg_64hi = _mm256_srli_si256(ssz_reg, 8);
-  // add the higher 64 bit to the low 64 bit
-  sse_reg = _mm256_add_epi64(sse_reg, sse_reg_64hi);
-  ssz_reg = _mm256_add_epi64(ssz_reg, ssz_reg_64hi);
-
-  // add each 64 bit from each of the 128 bit lane of the 256 bit
-  sse_reg128 = _mm_add_epi64(_mm256_castsi256_si128(sse_reg),
-                             _mm256_extractf128_si256(sse_reg, 1));
-
-  ssz_reg128 = _mm_add_epi64(_mm256_castsi256_si128(ssz_reg),
-                             _mm256_extractf128_si256(ssz_reg, 1));
-
-  // store the results
-  _mm_storel_epi64((__m128i *)(&sse), sse_reg128);
-
-  _mm_storel_epi64((__m128i *)(ssz), ssz_reg128);
-  _mm256_zeroupper();
-  return sse;
-}
diff --git a/third_party/aom/av1/encoder/x86/error_sse2.asm b/third_party/aom/av1/encoder/x86/error_sse2.asm
deleted file mode 100644
index 72e9e22b1..000000000
--- a/third_party/aom/av1/encoder/x86/error_sse2.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%define private_prefix av1
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-; int64_t av1_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
-;                         int64_t *ssz)
-
-INIT_XMM sse2
-cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
-  pxor      m4, m4                 ; sse accumulator
-  pxor      m6, m6                 ; ssz accumulator
-  pxor      m5, m5                 ; dedicated zero register
-  lea     uqcq, [uqcq+sizeq*2]
-  lea     dqcq, [dqcq+sizeq*2]
-  neg    sizeq
-.loop:
-  mova      m2, [uqcq+sizeq*2]
-  mova      m0, [dqcq+sizeq*2]
-  mova      m3, [uqcq+sizeq*2+mmsize]
-  mova      m1, [dqcq+sizeq*2+mmsize]
-  psubw     m0, m2
-  psubw     m1, m3
-  ; individual errors are max. 15bit+sign, so squares are 30bit, and
-  ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
-  pmaddwd   m0, m0
-  pmaddwd   m1, m1
-  pmaddwd   m2, m2
-  pmaddwd   m3, m3
-  ; accumulate in 64bit
-  punpckldq m7, m0, m5
-  punpckhdq m0, m5
-  paddq     m4, m7
-  punpckldq m7, m1, m5
-  paddq     m4, m0
-  punpckhdq m1, m5
-  paddq     m4, m7
-  punpckldq m7, m2, m5
-  paddq     m4, m1
-  punpckhdq m2, m5
-  paddq     m6, m7
-  punpckldq m7, m3, m5
-  paddq     m6, m2
-  punpckhdq m3, m5
-  paddq     m6, m7
-  paddq     m6, m3
-  add    sizeq, mmsize
-  jl .loop
-
-  ; accumulate horizontally and store in return value
-  movhlps   m5, m4
-  movhlps   m7, m6
-  paddq     m4, m5
-  paddq     m6, m7
-%if ARCH_X86_64
-  movq    rax, m4
-  movq [sszq], m6
-%else
-  mov     eax, sszm
-  pshufd   m5, m4, 0x1
-  movq  [eax], m6
-  movd    eax, m4
-  movd    edx, m5
-%endif
-  RET
diff --git a/third_party/aom/av1/encoder/x86/hash_sse42.c b/third_party/aom/av1/encoder/x86/hash_sse42.c
deleted file mode 100644
index 65fa46311..000000000
--- a/third_party/aom/av1/encoder/x86/hash_sse42.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdint.h>
-#include <smmintrin.h>
-
-// Byte-boundary alignment issues
-#define ALIGN_SIZE 8
-#define ALIGN_MASK (ALIGN_SIZE - 1)
-
-#define CALC_CRC(op, crc, type, buf, len) \
-  while ((len) >= sizeof(type)) {         \
-    (crc) = op((crc), *(type *)(buf));    \
-    (len) -= sizeof(type);                \
-    buf += sizeof(type);                  \
-  }
-
-/**
- * Calculates 32-bit CRC for the input buffer
- * polynomial is 0x11EDC6F41
- * @return A 32-bit unsigned integer representing the CRC
- */
-uint32_t av1_get_crc32c_value_sse4_2(void *crc_calculator, uint8_t *p,
-                                     size_t len) {
-  (void)crc_calculator;
-  const uint8_t *buf = p;
-  uint32_t crc = 0xFFFFFFFF;
-
-  // Align the input to the word boundary
-  for (; (len > 0) && ((intptr_t)buf & ALIGN_MASK); len--, buf++) {
-    crc = _mm_crc32_u8(crc, *buf);
-  }
-
-#ifdef __x86_64__
-  uint64_t crc64 = crc;
-  CALC_CRC(_mm_crc32_u64, crc64, uint64_t, buf, len);
-  crc = (uint32_t)crc64;
-#endif
-  CALC_CRC(_mm_crc32_u32, crc, uint32_t, buf, len);
-  CALC_CRC(_mm_crc32_u16, crc, uint16_t, buf, len);
-  CALC_CRC(_mm_crc32_u8, crc, uint8_t, buf, len);
-  return (crc ^= 0xFFFFFFFF);
-}
diff --git a/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c b/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c
deleted file mode 100644
index 777304ace..000000000
--- a/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <emmintrin.h>
-#include <stdio.h>
-
-#include "av1/common/common.h"
-
-int64_t av1_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
-                                    intptr_t block_size, int64_t *ssz,
-                                    int bps) {
-  int i, j, test;
-  uint32_t temp[4];
-  __m128i max, min, cmp0, cmp1, cmp2, cmp3;
-  int64_t error = 0, sqcoeff = 0;
-  const int shift = 2 * (bps - 8);
-  const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
-
-  for (i = 0; i < block_size; i += 8) {
-    // Load the data into xmm registers
-    __m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i));
-    __m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4));
-    __m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i));
-    __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
-    // Check if any values require more than 15 bit
-    max = _mm_set1_epi32(0x3fff);
-    min = _mm_set1_epi32(0xffffc000);
-    cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
-                         _mm_cmplt_epi32(mm_coeff, min));
-    cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
-                         _mm_cmplt_epi32(mm_coeff2, min));
-    cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max),
-                         _mm_cmplt_epi32(mm_dqcoeff, min));
-    cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max),
-                         _mm_cmplt_epi32(mm_dqcoeff2, min));
-    test = _mm_movemask_epi8(
-        _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3)));
-
-    if (!test) {
-      __m128i mm_diff, error_sse2, sqcoeff_sse2;
-      mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2);
-      mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2);
-      mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff);
-      error_sse2 = _mm_madd_epi16(mm_diff, mm_diff);
-      sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff);
-      _mm_storeu_si128((__m128i *)temp, error_sse2);
-      error = error + temp[0] + temp[1] + temp[2] + temp[3];
-      _mm_storeu_si128((__m128i *)temp, sqcoeff_sse2);
-      sqcoeff += temp[0] + temp[1] + temp[2] + temp[3];
-    } else {
-      for (j = 0; j < 8; j++) {
-        const int64_t diff = coeff[i + j] - dqcoeff[i + j];
-        error += diff * diff;
-        sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j];
-      }
-    }
-  }
-  assert(error >= 0 && sqcoeff >= 0);
-  error = (error + rounding) >> shift;
-  sqcoeff = (sqcoeff + rounding) >> shift;
-
-  *ssz = sqcoeff;
-  return error;
-}
diff --git a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
deleted file mode 100644
index 535485ae8..000000000
--- a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ /dev/null
@@ -1,1783 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include <assert.h>
-#include <smmintrin.h> /* SSE4.1 */
-
-#include "config/aom_config.h"
-#include "config/av1_rtcd.h"
-
-#include "av1/common/av1_txfm.h"
-#include "av1/common/x86/highbd_txfm_utility_sse4.h"
-#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
-#include "av1/encoder/x86/av1_txfm1d_sse4.h"
-#include "aom_dsp/txfm_common.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-#include "aom_ports/mem.h"
-
-static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
-                                   int stride, int flipud, int fliplr,
-                                   int shift) {
-  if (!flipud) {
-    in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
-    in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
-    in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
-    in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
-  } else {
-    in[0] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
-    in[1] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
-    in[2] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
-    in[3] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
-  }
-
-  if (fliplr) {
-    in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
-    in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
-    in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
-    in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
-  }
-
-  in[0] = _mm_cvtepi16_epi32(in[0]);
-  in[1] = _mm_cvtepi16_epi32(in[1]);
-  in[2] = _mm_cvtepi16_epi32(in[2]);
-  in[3] = _mm_cvtepi16_epi32(in[3]);
-
-  in[0] = _mm_slli_epi32(in[0], shift);
-  in[1] = _mm_slli_epi32(in[1], shift);
-  in[2] = _mm_slli_epi32(in[2], shift);
-  in[3] = _mm_slli_epi32(in[3], shift);
-}
-
-// We only use stage-2 bit;
-// shift[0] is used in load_buffer_4x4()
-// shift[1] is used in txfm_func_col()
-// shift[2] is used in txfm_func_row()
-static void fdct4x4_sse4_1(__m128i *in, int bit) {
-  const int32_t *cospi = cospi_arr(bit);
-  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
-  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
-  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
-  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
-  __m128i s0, s1, s2, s3;
-  __m128i u0, u1, u2, u3;
-  __m128i v0, v1, v2, v3;
-
-  s0 = _mm_add_epi32(in[0], in[3]);
-  s1 = _mm_add_epi32(in[1], in[2]);
-  s2 = _mm_sub_epi32(in[1], in[2]);
-  s3 = _mm_sub_epi32(in[0], in[3]);
-
-  // btf_32_sse4_1_type0(cospi32, cospi32, s[01], u[02], bit);
-  u0 = _mm_mullo_epi32(s0, cospi32);
-  u1 = _mm_mullo_epi32(s1, cospi32);
-  u2 = _mm_add_epi32(u0, u1);
-  v0 = _mm_sub_epi32(u0, u1);
-
-  u3 = _mm_add_epi32(u2, rnding);
-  v1 = _mm_add_epi32(v0, rnding);
-
-  u0 = _mm_srai_epi32(u3, bit);
-  u2 = _mm_srai_epi32(v1, bit);
-
-  // btf_32_sse4_1_type1(cospi48, cospi16, s[23], u[13], bit);
-  v0 = _mm_mullo_epi32(s2, cospi48);
-  v1 = _mm_mullo_epi32(s3, cospi16);
-  v2 = _mm_add_epi32(v0, v1);
-
-  v3 = _mm_add_epi32(v2, rnding);
-  u1 = _mm_srai_epi32(v3, bit);
-
-  v0 = _mm_mullo_epi32(s2, cospi16);
-  v1 = _mm_mullo_epi32(s3, cospi48);
-  v2 = _mm_sub_epi32(v1, v0);
-
-  v3 = _mm_add_epi32(v2, rnding);
-  u3 = _mm_srai_epi32(v3, bit);
-
-  // Note: shift[1] and shift[2] are zeros
-
-  // Transpose 4x4 32-bit
-  v0 = _mm_unpacklo_epi32(u0, u1);
-  v1 = _mm_unpackhi_epi32(u0, u1);
-  v2 = _mm_unpacklo_epi32(u2, u3);
-  v3 = _mm_unpackhi_epi32(u2, u3);
-
-  in[0] = _mm_unpacklo_epi64(v0, v2);
-  in[1] = _mm_unpackhi_epi64(v0, v2);
-  in[2] = _mm_unpacklo_epi64(v1, v3);
-  in[3] = _mm_unpackhi_epi64(v1, v3);
-}
-
-static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) {
-  _mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
-  _mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
-  _mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
-  _mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
-}
-
-static void fadst4x4_sse4_1(__m128i *in, int bit) {
-  const int32_t *sinpi = sinpi_arr(bit);
-  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
-  const __m128i sinpi1 = _mm_set1_epi32((int)sinpi[1]);
-  const __m128i sinpi2 = _mm_set1_epi32((int)sinpi[2]);
-  const __m128i sinpi3 = _mm_set1_epi32((int)sinpi[3]);
-  const __m128i sinpi4 = _mm_set1_epi32((int)sinpi[4]);
-  __m128i t;
-  __m128i s0, s1, s2, s3, s4, s5, s6, s7;
-  __m128i x0, x1, x2, x3;
-  __m128i u0, u1, u2, u3;
-  __m128i v0, v1, v2, v3;
-
-  s0 = _mm_mullo_epi32(in[0], sinpi1);
-  s1 = _mm_mullo_epi32(in[0], sinpi4);
-  s2 = _mm_mullo_epi32(in[1], sinpi2);
-  s3 = _mm_mullo_epi32(in[1], sinpi1);
-  s4 = _mm_mullo_epi32(in[2], sinpi3);
-  s5 = _mm_mullo_epi32(in[3], sinpi4);
-  s6 = _mm_mullo_epi32(in[3], sinpi2);
-  t = _mm_add_epi32(in[0], in[1]);
-  s7 = _mm_sub_epi32(t, in[3]);
-
-  t = _mm_add_epi32(s0, s2);
-  x0 = _mm_add_epi32(t, s5);
-  x1 = _mm_mullo_epi32(s7, sinpi3);
-  t = _mm_sub_epi32(s1, s3);
-  x2 = _mm_add_epi32(t, s6);
-  x3 = s4;
-
-  s0 = _mm_add_epi32(x0, x3);
-  s1 = x1;
-  s2 = _mm_sub_epi32(x2, x3);
-  t = _mm_sub_epi32(x2, x0);
-  s3 = _mm_add_epi32(t, x3);
-
-  u0 = _mm_add_epi32(s0, rnding);
-  u0 = _mm_srai_epi32(u0, bit);
-
-  u1 = _mm_add_epi32(s1, rnding);
-  u1 = _mm_srai_epi32(u1, bit);
-
-  u2 = _mm_add_epi32(s2, rnding);
-  u2 = _mm_srai_epi32(u2, bit);
-
-  u3 = _mm_add_epi32(s3, rnding);
-  u3 = _mm_srai_epi32(u3, bit);
-
-  v0 = _mm_unpacklo_epi32(u0, u1);
-  v1 = _mm_unpackhi_epi32(u0, u1);
-  v2 = _mm_unpacklo_epi32(u2, u3);
-  v3 = _mm_unpackhi_epi32(u2, u3);
-
-  in[0] = _mm_unpacklo_epi64(v0, v2);
-  in[1] = _mm_unpackhi_epi64(v0, v2);
-  in[2] = _mm_unpacklo_epi64(v1, v3);
-  in[3] = _mm_unpackhi_epi64(v1, v3);
-}
-
-void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff,
-                               int input_stride, TX_TYPE tx_type, int bd) {
-  __m128i in[4];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_4X4];
-  const int txw_idx = get_txw_idx(TX_4X4);
-  const int txh_idx = get_txh_idx(TX_4X4);
-
-  switch (tx_type) {
-    case DCT_DCT:
-      load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]);
-      fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    case ADST_DCT:
-      load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    case DCT_ADST:
-      load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]);
-      fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    case ADST_ADST:
-      load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    case FLIPADST_DCT:
-      load_buffer_4x4(input, in, input_stride, 1, 0, shift[0]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    case DCT_FLIPADST:
-      load_buffer_4x4(input, in, input_stride, 0, 1, shift[0]);
-      fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    case FLIPADST_FLIPADST:
-      load_buffer_4x4(input, in, input_stride, 1, 1, shift[0]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    case ADST_FLIPADST:
-      load_buffer_4x4(input, in, input_stride, 0, 1, shift[0]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    case FLIPADST_ADST:
-      load_buffer_4x4(input, in, input_stride, 1, 0, shift[0]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]);
-      fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]);
-      write_buffer_4x4(in, coeff);
-      break;
-    default: assert(0);
-  }
-  (void)bd;
-}
-
-static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
-                                   int stride, int flipud, int fliplr,
-                                   int shift) {
-  __m128i u;
-  if (!flipud) {
-    in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
-    in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
-    in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
-    in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
-    in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
-    in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
-    in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
-    in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
-  } else {
-    in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
-    in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
-    in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
-    in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
-    in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
-    in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
-    in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
-    in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
-  }
-
-  if (fliplr) {
-    in[0] = mm_reverse_epi16(in[0]);
-    in[1] = mm_reverse_epi16(in[1]);
-    in[2] = mm_reverse_epi16(in[2]);
-    in[3] = mm_reverse_epi16(in[3]);
-    in[4] = mm_reverse_epi16(in[4]);
-    in[5] = mm_reverse_epi16(in[5]);
-    in[6] = mm_reverse_epi16(in[6]);
-    in[7] = mm_reverse_epi16(in[7]);
-  }
-
-  u = _mm_unpackhi_epi64(in[4], in[4]);
-  in[8] = _mm_cvtepi16_epi32(in[4]);
-  in[9] = _mm_cvtepi16_epi32(u);
-
-  u = _mm_unpackhi_epi64(in[5], in[5]);
-  in[10] = _mm_cvtepi16_epi32(in[5]);
-  in[11] = _mm_cvtepi16_epi32(u);
-
-  u = _mm_unpackhi_epi64(in[6], in[6]);
-  in[12] = _mm_cvtepi16_epi32(in[6]);
-  in[13] = _mm_cvtepi16_epi32(u);
-
-  u = _mm_unpackhi_epi64(in[7], in[7]);
-  in[14] = _mm_cvtepi16_epi32(in[7]);
-  in[15] = _mm_cvtepi16_epi32(u);
-
-  u = _mm_unpackhi_epi64(in[3], in[3]);
-  in[6] = _mm_cvtepi16_epi32(in[3]);
-  in[7] = _mm_cvtepi16_epi32(u);
-
-  u = _mm_unpackhi_epi64(in[2], in[2]);
-  in[4] = _mm_cvtepi16_epi32(in[2]);
-  in[5] = _mm_cvtepi16_epi32(u);
-
-  u = _mm_unpackhi_epi64(in[1], in[1]);
-  in[2] = _mm_cvtepi16_epi32(in[1]);
-  in[3] = _mm_cvtepi16_epi32(u);
-
-  u = _mm_unpackhi_epi64(in[0], in[0]);
-  in[0] = _mm_cvtepi16_epi32(in[0]);
-  in[1] = _mm_cvtepi16_epi32(u);
-
-  in[0] = _mm_slli_epi32(in[0], shift);
-  in[1] = _mm_slli_epi32(in[1], shift);
-  in[2] = _mm_slli_epi32(in[2], shift);
-  in[3] = _mm_slli_epi32(in[3], shift);
-  in[4] = _mm_slli_epi32(in[4], shift);
-  in[5] = _mm_slli_epi32(in[5], shift);
-  in[6] = _mm_slli_epi32(in[6], shift);
-  in[7] = _mm_slli_epi32(in[7], shift);
-
-  in[8] = _mm_slli_epi32(in[8], shift);
-  in[9] = _mm_slli_epi32(in[9], shift);
-  in[10] = _mm_slli_epi32(in[10], shift);
-  in[11] = _mm_slli_epi32(in[11], shift);
-  in[12] = _mm_slli_epi32(in[12], shift);
-  in[13] = _mm_slli_epi32(in[13], shift);
-  in[14] = _mm_slli_epi32(in[14], shift);
-  in[15] = _mm_slli_epi32(in[15], shift);
-}
-
-static INLINE void col_txfm_8x8_rounding(__m128i *in, int shift) {
-  const __m128i rounding = _mm_set1_epi32(1 << (shift - 1));
-
-  in[0] = _mm_add_epi32(in[0], rounding);
-  in[1] = _mm_add_epi32(in[1], rounding);
-  in[2] = _mm_add_epi32(in[2], rounding);
-  in[3] = _mm_add_epi32(in[3], rounding);
-  in[4] = _mm_add_epi32(in[4], rounding);
-  in[5] = _mm_add_epi32(in[5], rounding);
-  in[6] = _mm_add_epi32(in[6], rounding);
-  in[7] = _mm_add_epi32(in[7], rounding);
-  in[8] = _mm_add_epi32(in[8], rounding);
-  in[9] = _mm_add_epi32(in[9], rounding);
-  in[10] = _mm_add_epi32(in[10], rounding);
-  in[11] = _mm_add_epi32(in[11], rounding);
-  in[12] = _mm_add_epi32(in[12], rounding);
-  in[13] = _mm_add_epi32(in[13], rounding);
-  in[14] = _mm_add_epi32(in[14], rounding);
-  in[15] = _mm_add_epi32(in[15], rounding);
-
-  in[0] = _mm_srai_epi32(in[0], shift);
-  in[1] = _mm_srai_epi32(in[1], shift);
-  in[2] = _mm_srai_epi32(in[2], shift);
-  in[3] = _mm_srai_epi32(in[3], shift);
-  in[4] = _mm_srai_epi32(in[4], shift);
-  in[5] = _mm_srai_epi32(in[5], shift);
-  in[6] = _mm_srai_epi32(in[6], shift);
-  in[7] = _mm_srai_epi32(in[7], shift);
-  in[8] = _mm_srai_epi32(in[8], shift);
-  in[9] = _mm_srai_epi32(in[9], shift);
-  in[10] = _mm_srai_epi32(in[10], shift);
-  in[11] = _mm_srai_epi32(in[11], shift);
-  in[12] = _mm_srai_epi32(in[12], shift);
-  in[13] = _mm_srai_epi32(in[13], shift);
-  in[14] = _mm_srai_epi32(in[14], shift);
-  in[15] = _mm_srai_epi32(in[15], shift);
-}
-
-static INLINE void write_buffer_8x8(const __m128i *res, int32_t *output) {
-  _mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
-  _mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
-  _mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
-  _mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
-
-  _mm_store_si128((__m128i *)(output + 4 * 4), res[4]);
-  _mm_store_si128((__m128i *)(output + 5 * 4), res[5]);
-  _mm_store_si128((__m128i *)(output + 6 * 4), res[6]);
-  _mm_store_si128((__m128i *)(output + 7 * 4), res[7]);
-
-  _mm_store_si128((__m128i *)(output + 8 * 4), res[8]);
-  _mm_store_si128((__m128i *)(output + 9 * 4), res[9]);
-  _mm_store_si128((__m128i *)(output + 10 * 4), res[10]);
-  _mm_store_si128((__m128i *)(output + 11 * 4), res[11]);
-
-  _mm_store_si128((__m128i *)(output + 12 * 4), res[12]);
-  _mm_store_si128((__m128i *)(output + 13 * 4), res[13]);
-  _mm_store_si128((__m128i *)(output + 14 * 4), res[14]);
-  _mm_store_si128((__m128i *)(output + 15 * 4), res[15]);
-}
-
-static INLINE void write_buffer_16x8(const __m128i *res, int32_t *output,
-                                     const int stride) {
-  _mm_storeu_si128((__m128i *)(output), res[0]);
-  _mm_storeu_si128((__m128i *)(output + 4), res[1]);
-  _mm_storeu_si128((__m128i *)(output + stride), res[2]);
-  _mm_storeu_si128((__m128i *)(output + stride + 4), res[3]);
-
-  _mm_storeu_si128((__m128i *)(output + (stride * 2)), res[4]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 2) + 4), res[5]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 3)), res[6]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 3) + 4), res[7]);
-
-  _mm_storeu_si128((__m128i *)(output + (stride * 4)), res[8]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 4) + 4), res[9]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 5)), res[10]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 5) + 4), res[11]);
-
-  _mm_storeu_si128((__m128i *)(output + (stride * 6)), res[12]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 6) + 4), res[13]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 7)), res[14]);
-  _mm_storeu_si128((__m128i *)(output + (stride * 7) + 4), res[15]);
-}
-
-static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit,
-                           const int col_num) {
-  (void)(col_num);
-  const int32_t *cospi = cospi_arr(bit);
-  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
-  const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
-  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
-  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
-  const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
-  const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
-  const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
-  const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
-  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
-  __m128i u[8], v[8];
-
-  // Even 8 points 0, 2, ..., 14
-  // stage 0
-  // stage 1
-  u[0] = _mm_add_epi32(in[0], in[14]);
-  v[7] = _mm_sub_epi32(in[0], in[14]);  // v[7]
-  u[1] = _mm_add_epi32(in[2], in[12]);
-  u[6] = _mm_sub_epi32(in[2], in[12]);
-  u[2] = _mm_add_epi32(in[4], in[10]);
-  u[5] = _mm_sub_epi32(in[4], in[10]);
-  u[3] = _mm_add_epi32(in[6], in[8]);
-  v[4] = _mm_sub_epi32(in[6], in[8]);  // v[4]
-
-  // stage 2
-  v[0] = _mm_add_epi32(u[0], u[3]);
-  v[3] = _mm_sub_epi32(u[0], u[3]);
-  v[1] = _mm_add_epi32(u[1], u[2]);
-  v[2] = _mm_sub_epi32(u[1], u[2]);
-
-  v[5] = _mm_mullo_epi32(u[5], cospim32);
-  v[6] = _mm_mullo_epi32(u[6], cospi32);
-  v[5] = _mm_add_epi32(v[5], v[6]);
-  v[5] = _mm_add_epi32(v[5], rnding);
-  v[5] = _mm_srai_epi32(v[5], bit);
-
-  u[0] = _mm_mullo_epi32(u[5], cospi32);
-  v[6] = _mm_mullo_epi32(u[6], cospim32);
-  v[6] = _mm_sub_epi32(u[0], v[6]);
-  v[6] = _mm_add_epi32(v[6], rnding);
-  v[6] = _mm_srai_epi32(v[6], bit);
-
-  // stage 3
-  // type 0
-  v[0] = _mm_mullo_epi32(v[0], cospi32);
-  v[1] = _mm_mullo_epi32(v[1], cospi32);
-  u[0] = _mm_add_epi32(v[0], v[1]);
-  u[0] = _mm_add_epi32(u[0], rnding);
-  u[0] = _mm_srai_epi32(u[0], bit);
-
-  u[1] = _mm_sub_epi32(v[0], v[1]);
-  u[1] = _mm_add_epi32(u[1], rnding);
-  u[1] = _mm_srai_epi32(u[1], bit);
-
-  // type 1
-  v[0] = _mm_mullo_epi32(v[2], cospi48);
-  v[1] = _mm_mullo_epi32(v[3], cospi16);
-  u[2] = _mm_add_epi32(v[0], v[1]);
-  u[2] = _mm_add_epi32(u[2], rnding);
-  u[2] = _mm_srai_epi32(u[2], bit);
-
-  v[0] = _mm_mullo_epi32(v[2], cospi16);
-  v[1] = _mm_mullo_epi32(v[3], cospi48);
-  u[3] = _mm_sub_epi32(v[1], v[0]);
-  u[3] = _mm_add_epi32(u[3], rnding);
-  u[3] = _mm_srai_epi32(u[3], bit);
-
-  u[4] = _mm_add_epi32(v[4], v[5]);
-  u[5] = _mm_sub_epi32(v[4], v[5]);
-  u[6] = _mm_sub_epi32(v[7], v[6]);
-  u[7] = _mm_add_epi32(v[7], v[6]);
-
-  // stage 4
-  // stage 5
-  v[0] = _mm_mullo_epi32(u[4], cospi56);
-  v[1] = _mm_mullo_epi32(u[7], cospi8);
-  v[0] = _mm_add_epi32(v[0], v[1]);
-  v[0] = _mm_add_epi32(v[0], rnding);
-  out[2] = _mm_srai_epi32(v[0], bit);  // buf0[4]
-
-  v[0] = _mm_mullo_epi32(u[4], cospi8);
-  v[1] = _mm_mullo_epi32(u[7], cospi56);
-  v[0] = _mm_sub_epi32(v[1], v[0]);
-  v[0] = _mm_add_epi32(v[0], rnding);
-  out[14] = _mm_srai_epi32(v[0], bit);  // buf0[7]
-
-  v[0] = _mm_mullo_epi32(u[5], cospi24);
-  v[1] = _mm_mullo_epi32(u[6], cospi40);
-  v[0] = _mm_add_epi32(v[0], v[1]);
-  v[0] = _mm_add_epi32(v[0], rnding);
-  out[10] = _mm_srai_epi32(v[0], bit);  // buf0[5]
-
-  v[0] = _mm_mullo_epi32(u[5], cospi40);
-  v[1] = _mm_mullo_epi32(u[6], cospi24);
-  v[0] = _mm_sub_epi32(v[1], v[0]);
-  v[0] = _mm_add_epi32(v[0], rnding);
-  out[6] = _mm_srai_epi32(v[0], bit);  // buf0[6]
-
-  out[0] = u[0];   // buf0[0]
-  out[8] = u[1];   // buf0[1]
-  out[4] = u[2];   // buf0[2]
-  out[12] = u[3];  // buf0[3]
-
-  // Odd 8 points: 1, 3, ..., 15
-  // stage 0
-  // stage 1
-  u[0] = _mm_add_epi32(in[1], in[15]);
-  v[7] = _mm_sub_epi32(in[1], in[15]);  // v[7]
-  u[1] = _mm_add_epi32(in[3], in[13]);
-  u[6] = _mm_sub_epi32(in[3], in[13]);
-  u[2] = _mm_add_epi32(in[5], in[11]);
-  u[5] = _mm_sub_epi32(in[5], in[11]);
-  u[3] = _mm_add_epi32(in[7], in[9]);
-  v[4] = _mm_sub_epi32(in[7], in[9]);  // v[4]
-
-  // stage 2
-  v[0] = _mm_add_epi32(u[0], u[3]);
-  v[3] = _mm_sub_epi32(u[0], u[3]);
-  v[1] = _mm_add_epi32(u[1], u[2]);
-  v[2] = _mm_sub_epi32(u[1], u[2]);
-
-  v[5] = _mm_mullo_epi32(u[5], cospim32);
-  v[6] = _mm_mullo_epi32(u[6], cospi32);
-  v[5] = _mm_add_epi32(v[5], v[6]);
-  v[5] = _mm_add_epi32(v[5], rnding);
-  v[5] = _mm_srai_epi32(v[5], bit);
-
-  u[0] = _mm_mullo_epi32(u[5], cospi32);
-  v[6] = _mm_mullo_epi32(u[6], cospim32);
-  v[6] = _mm_sub_epi32(u[0], v[6]);
-  v[6] = _mm_add_epi32(v[6], rnding);
-  v[6] = _mm_srai_epi32(v[6], bit);
-
-  // stage 3
-  // type 0
-  v[0] = _mm_mullo_epi32(v[0], cospi32);
-  v[1] = _mm_mullo_epi32(v[1], cospi32);
-  u[0] = _mm_add_epi32(v[0], v[1]);
-  u[0] = _mm_add_epi32(u[0], rnding);
-  u[0] = _mm_srai_epi32(u[0], bit);
-
-  u[1] = _mm_sub_epi32(v[0], v[1]);
-  u[1] = _mm_add_epi32(u[1], rnding);
-  u[1] = _mm_srai_epi32(u[1], bit);
-
-  // type 1
-  v[0] = _mm_mullo_epi32(v[2], cospi48);
-  v[1] = _mm_mullo_epi32(v[3], cospi16);
-  u[2] = _mm_add_epi32(v[0], v[1]);
-  u[2] = _mm_add_epi32(u[2], rnding);
-  u[2] = _mm_srai_epi32(u[2], bit);
-
-  v[0] = _mm_mullo_epi32(v[2], cospi16);
-  v[1] = _mm_mullo_epi32(v[3], cospi48);
-  u[3] = _mm_sub_epi32(v[1], v[0]);
-  u[3] = _mm_add_epi32(u[3], rnding);
-  u[3] = _mm_srai_epi32(u[3], bit);
-
-  u[4] = _mm_add_epi32(v[4], v[5]);
-  u[5] = _mm_sub_epi32(v[4], v[5]);
-  u[6] = _mm_sub_epi32(v[7], v[6]);
-  u[7] = _mm_add_epi32(v[7], v[6]);
-
-  // stage 4
-  // stage 5
-  v[0] = _mm_mullo_epi32(u[4], cospi56);
-  v[1] = _mm_mullo_epi32(u[7], cospi8);
-  v[0] = _mm_add_epi32(v[0], v[1]);
-  v[0] = _mm_add_epi32(v[0], rnding);
-  out[3] = _mm_srai_epi32(v[0], bit);  // buf0[4]
-
-  v[0] = _mm_mullo_epi32(u[4], cospi8);
-  v[1] = _mm_mullo_epi32(u[7], cospi56);
-  v[0] = _mm_sub_epi32(v[1], v[0]);
-  v[0] = _mm_add_epi32(v[0], rnding);
-  out[15] = _mm_srai_epi32(v[0], bit);  // buf0[7]
-
-  v[0] = _mm_mullo_epi32(u[5], cospi24);
-  v[1] = _mm_mullo_epi32(u[6], cospi40);
-  v[0] = _mm_add_epi32(v[0], v[1]);
-  v[0] = _mm_add_epi32(v[0], rnding);
-  out[11] = _mm_srai_epi32(v[0], bit);  // buf0[5]
-
-  v[0] = _mm_mullo_epi32(u[5], cospi40);
-  v[1] = _mm_mullo_epi32(u[6], cospi24);
-  v[0] = _mm_sub_epi32(v[1], v[0]);
-  v[0] = _mm_add_epi32(v[0], rnding);
-  out[7] = _mm_srai_epi32(v[0], bit);  // buf0[6]
-
-  out[1] = u[0];   // buf0[0]
-  out[9] = u[1];   // buf0[1]
-  out[5] = u[2];   // buf0[2]
-  out[13] = u[3];  // buf0[3]
-}
-
-static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit,
-                            const int col_num) {
-  (void)(col_num);
-  const int32_t *cospi = cospi_arr(bit);
-  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
-  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
-  const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
-  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
-  const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
-  const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
-  const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
-  const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
-  const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
-  const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
-  const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
-  const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
-  const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
-  const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
-  const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
-  const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
-  const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
-  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
-  const __m128i zero = _mm_setzero_si128();
-  __m128i u0, u1, u2, u3, u4, u5, u6, u7;
-  __m128i v0, v1, v2, v3, v4, v5, v6, v7;
-  __m128i x, y;
-  int col;
-
-  // Note:
-  //  Even column: 0, 2, ..., 14
-  //  Odd column: 1, 3, ..., 15
-  //  one even column plus one odd column constructs one row (8 coeffs)
-  //  total we have 8 rows (8x8).
-  for (col = 0; col < 2; ++col) {
-    // stage 0
-    // stage 1
-    u0 = in[2 * 0 + col];
-    u1 = _mm_sub_epi32(zero, in[2 * 7 + col]);
-    u2 = _mm_sub_epi32(zero, in[2 * 3 + col]);
-    u3 = in[2 * 4 + col];
-    u4 = _mm_sub_epi32(zero, in[2 * 1 + col]);
-    u5 = in[2 * 6 + col];
-    u6 = in[2 * 2 + col];
-    u7 = _mm_sub_epi32(zero, in[2 * 5 + col]);
-
-    // stage 2
-    v0 = u0;
-    v1 = u1;
-
-    x = _mm_mullo_epi32(u2, cospi32);
-    y = _mm_mullo_epi32(u3, cospi32);
-    v2 = _mm_add_epi32(x, y);
-    v2 = _mm_add_epi32(v2, rnding);
-    v2 = _mm_srai_epi32(v2, bit);
-
-    v3 = _mm_sub_epi32(x, y);
-    v3 = _mm_add_epi32(v3, rnding);
-    v3 = _mm_srai_epi32(v3, bit);
-
-    v4 = u4;
-    v5 = u5;
-
-    x = _mm_mullo_epi32(u6, cospi32);
-    y = _mm_mullo_epi32(u7, cospi32);
-    v6 = _mm_add_epi32(x, y);
-    v6 = _mm_add_epi32(v6, rnding);
-    v6 = _mm_srai_epi32(v6, bit);
-
-    v7 = _mm_sub_epi32(x, y);
-    v7 = _mm_add_epi32(v7, rnding);
-    v7 = _mm_srai_epi32(v7, bit);
-
-    // stage 3
-    u0 = _mm_add_epi32(v0, v2);
-    u1 = _mm_add_epi32(v1, v3);
-    u2 = _mm_sub_epi32(v0, v2);
-    u3 = _mm_sub_epi32(v1, v3);
-    u4 = _mm_add_epi32(v4, v6);
-    u5 = _mm_add_epi32(v5, v7);
-    u6 = _mm_sub_epi32(v4, v6);
-    u7 = _mm_sub_epi32(v5, v7);
-
-    // stage 4
-    v0 = u0;
-    v1 = u1;
-    v2 = u2;
-    v3 = u3;
-
-    x = _mm_mullo_epi32(u4, cospi16);
-    y = _mm_mullo_epi32(u5, cospi48);
-    v4 = _mm_add_epi32(x, y);
-    v4 = _mm_add_epi32(v4, rnding);
-    v4 = _mm_srai_epi32(v4, bit);
-
-    x = _mm_mullo_epi32(u4, cospi48);
-    y = _mm_mullo_epi32(u5, cospim16);
-    v5 = _mm_add_epi32(x, y);
-    v5 = _mm_add_epi32(v5, rnding);
-    v5 = _mm_srai_epi32(v5, bit);
-
-    x = _mm_mullo_epi32(u6, cospim48);
-    y = _mm_mullo_epi32(u7, cospi16);
-    v6 = _mm_add_epi32(x, y);
-    v6 = _mm_add_epi32(v6, rnding);
-    v6 = _mm_srai_epi32(v6, bit);
-
-    x = _mm_mullo_epi32(u6, cospi16);
-    y = _mm_mullo_epi32(u7, cospi48);
-    v7 = _mm_add_epi32(x, y);
-    v7 = _mm_add_epi32(v7, rnding);
-    v7 = _mm_srai_epi32(v7, bit);
-
-    // stage 5
-    u0 = _mm_add_epi32(v0, v4);
-    u1 = _mm_add_epi32(v1, v5);
-    u2 = _mm_add_epi32(v2, v6);
-    u3 = _mm_add_epi32(v3, v7);
-    u4 = _mm_sub_epi32(v0, v4);
-    u5 = _mm_sub_epi32(v1, v5);
-    u6 = _mm_sub_epi32(v2, v6);
-    u7 = _mm_sub_epi32(v3, v7);
-
-    // stage 6
-    x = _mm_mullo_epi32(u0, cospi4);
-    y = _mm_mullo_epi32(u1, cospi60);
-    v0 = _mm_add_epi32(x, y);
-    v0 = _mm_add_epi32(v0, rnding);
-    v0 = _mm_srai_epi32(v0, bit);
-
-    x = _mm_mullo_epi32(u0, cospi60);
-    y = _mm_mullo_epi32(u1, cospim4);
-    v1 = _mm_add_epi32(x, y);
-    v1 = _mm_add_epi32(v1, rnding);
-    v1 = _mm_srai_epi32(v1, bit);
-
-    x = _mm_mullo_epi32(u2, cospi20);
-    y = _mm_mullo_epi32(u3, cospi44);
-    v2 = _mm_add_epi32(x, y);
-    v2 = _mm_add_epi32(v2, rnding);
-    v2 = _mm_srai_epi32(v2, bit);
-
-    x = _mm_mullo_epi32(u2, cospi44);
-    y = _mm_mullo_epi32(u3, cospim20);
-    v3 = _mm_add_epi32(x, y);
-    v3 = _mm_add_epi32(v3, rnding);
-    v3 = _mm_srai_epi32(v3, bit);
-
-    x = _mm_mullo_epi32(u4, cospi36);
-    y = _mm_mullo_epi32(u5, cospi28);
-    v4 = _mm_add_epi32(x, y);
-    v4 = _mm_add_epi32(v4, rnding);
-    v4 = _mm_srai_epi32(v4, bit);
-
-    x = _mm_mullo_epi32(u4, cospi28);
-    y = _mm_mullo_epi32(u5, cospim36);
-    v5 = _mm_add_epi32(x, y);
-    v5 = _mm_add_epi32(v5, rnding);
-    v5 = _mm_srai_epi32(v5, bit);
-
-    x = _mm_mullo_epi32(u6, cospi52);
-    y = _mm_mullo_epi32(u7, cospi12);
-    v6 = _mm_add_epi32(x, y);
-    v6 = _mm_add_epi32(v6, rnding);
-    v6 = _mm_srai_epi32(v6, bit);
-
-    x = _mm_mullo_epi32(u6, cospi12);
-    y = _mm_mullo_epi32(u7, cospim52);
-    v7 = _mm_add_epi32(x, y);
-    v7 = _mm_add_epi32(v7, rnding);
-    v7 = _mm_srai_epi32(v7, bit);
-
-    // stage 7
-    out[2 * 0 + col] = v1;
-    out[2 * 1 + col] = v6;
-    out[2 * 2 + col] = v3;
-    out[2 * 3 + col] = v4;
-    out[2 * 4 + col] = v5;
-    out[2 * 5 + col] = v2;
-    out[2 * 6 + col] = v7;
-    out[2 * 7 + col] = v0;
-  }
-}
-
-void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
-                               TX_TYPE tx_type, int bd) {
-  __m128i in[16], out[16];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_8X8];
-  const int txw_idx = get_txw_idx(TX_8X8);
-  const int txh_idx = get_txh_idx(TX_8X8);
-
-  switch (tx_type) {
-    case DCT_DCT:
-      load_buffer_8x8(input, in, stride, 0, 0, shift[0]);
-      fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    case ADST_DCT:
-      load_buffer_8x8(input, in, stride, 0, 0, shift[0]);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    case DCT_ADST:
-      load_buffer_8x8(input, in, stride, 0, 0, shift[0]);
-      fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    case ADST_ADST:
-      load_buffer_8x8(input, in, stride, 0, 0, shift[0]);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    case FLIPADST_DCT:
-      load_buffer_8x8(input, in, stride, 1, 0, shift[0]);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    case DCT_FLIPADST:
-      load_buffer_8x8(input, in, stride, 0, 1, shift[0]);
-      fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    case FLIPADST_FLIPADST:
-      load_buffer_8x8(input, in, stride, 1, 1, shift[0]);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    case ADST_FLIPADST:
-      load_buffer_8x8(input, in, stride, 0, 1, shift[0]);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    case FLIPADST_ADST:
-      load_buffer_8x8(input, in, stride, 1, 0, shift[0]);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0);
-      col_txfm_8x8_rounding(out, -shift[1]);
-      transpose_8x8(out, in);
-      fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0);
-      transpose_8x8(out, in);
-      write_buffer_8x8(in, coeff);
-      break;
-    default: assert(0);
-  }
-  (void)bd;
-}
-
-// Hybrid Transform 16x16
-
-static INLINE void convert_8x8_to_16x16(const __m128i *in, __m128i *out) {
-  int row_index = 0;
-  int dst_index = 0;
-  int src_index = 0;
-
-  // row 0, 1, .., 7
-  do {
-    out[dst_index] = in[src_index];
-    out[dst_index + 1] = in[src_index + 1];
-    out[dst_index + 2] = in[src_index + 16];
-    out[dst_index + 3] = in[src_index + 17];
-    dst_index += 4;
-    src_index += 2;
-    row_index += 1;
-  } while (row_index < 8);
-
-  // row 8, 9, ..., 15
-  src_index += 16;
-  do {
-    out[dst_index] = in[src_index];
-    out[dst_index + 1] = in[src_index + 1];
-    out[dst_index + 2] = in[src_index + 16];
-    out[dst_index + 3] = in[src_index + 17];
-    dst_index += 4;
-    src_index += 2;
-    row_index += 1;
-  } while (row_index < 16);
-}
-
-static INLINE void load_buffer_16x16(const int16_t *input, __m128i *out,
-                                     int stride, int flipud, int fliplr,
-                                     int shift) {
-  __m128i in[64];
-  // Load 4 8x8 blocks
-  const int16_t *topL = input;
-  const int16_t *topR = input + 8;
-  const int16_t *botL = input + 8 * stride;
-  const int16_t *botR = input + 8 * stride + 8;
-
-  const int16_t *tmp;
-
-  if (flipud) {
-    // Swap left columns
-    tmp = topL;
-    topL = botL;
-    botL = tmp;
-    // Swap right columns
-    tmp = topR;
-    topR = botR;
-    botR = tmp;
-  }
-
-  if (fliplr) {
-    // Swap top rows
-    tmp = topL;
-    topL = topR;
-    topR = tmp;
-    // Swap bottom rows
-    tmp = botL;
-    botL = botR;
-    botR = tmp;
-  }
-
-  // load first 8 columns
-  load_buffer_8x8(topL, &in[0], stride, flipud, fliplr, shift);
-  load_buffer_8x8(botL, &in[32], stride, flipud, fliplr, shift);
-
-  // load second 8 columns
-  load_buffer_8x8(topR, &in[16], stride, flipud, fliplr, shift);
-  load_buffer_8x8(botR, &in[48], stride, flipud, fliplr, shift);
-
-  convert_8x8_to_16x16(in, out);
-}
-
-static INLINE void load_buffer_8x16(const int16_t *input, __m128i *out,
-                                    int stride, int flipud, int fliplr,
-                                    int shift) {
-  const int16_t *topL = input;
-  const int16_t *botL = input + 8 * stride;
-
-  const int16_t *tmp;
-
-  if (flipud) {
-    tmp = topL;
-    topL = botL;
-    botL = tmp;
-  }
-
-  load_buffer_8x8(topL, out, stride, flipud, fliplr, shift);
-  load_buffer_8x8(botL, out + 16, stride, flipud, fliplr, shift);
-}
-
-static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit,
-                             const int col_num) {
-  const int32_t *cospi = cospi_arr(bit);
-  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
-  const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
-  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
-  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
-  const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
-  const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
-  const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
-  const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
-  const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
-  const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
-  const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
-  const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
-  const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
-  const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
-  const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
-  const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
-  const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
-  const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
-  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
-  __m128i u[16], v[16], x;
-  int col;
-
-  // Calculate the column 0, 1, 2, 3
-  for (col = 0; col < col_num; ++col) {
-    // stage 0
-    // stage 1
-    u[0] = _mm_add_epi32(in[0 * col_num + col], in[15 * col_num + col]);
-    u[15] = _mm_sub_epi32(in[0 * col_num + col], in[15 * col_num + col]);
-    u[1] = _mm_add_epi32(in[1 * col_num + col], in[14 * col_num + col]);
-    u[14] = _mm_sub_epi32(in[1 * col_num + col], in[14 * col_num + col]);
-    u[2] = _mm_add_epi32(in[2 * col_num + col], in[13 * col_num + col]);
-    u[13] = _mm_sub_epi32(in[2 * col_num + col], in[13 * col_num + col]);
-    u[3] = _mm_add_epi32(in[3 * col_num + col], in[12 * col_num + col]);
-    u[12] = _mm_sub_epi32(in[3 * col_num + col], in[12 * col_num + col]);
-    u[4] = _mm_add_epi32(in[4 * col_num + col], in[11 * col_num + col]);
-    u[11] = _mm_sub_epi32(in[4 * col_num + col], in[11 * col_num + col]);
-    u[5] = _mm_add_epi32(in[5 * col_num + col], in[10 * col_num + col]);
-    u[10] = _mm_sub_epi32(in[5 * col_num + col], in[10 * col_num + col]);
-    u[6] = _mm_add_epi32(in[6 * col_num + col], in[9 * col_num + col]);
-    u[9] = _mm_sub_epi32(in[6 * col_num + col], in[9 * col_num + col]);
-    u[7] = _mm_add_epi32(in[7 * col_num + col], in[8 * col_num + col]);
-    u[8] = _mm_sub_epi32(in[7 * col_num + col], in[8 * col_num + col]);
-
-    // stage 2
-    v[0] = _mm_add_epi32(u[0], u[7]);
-    v[7] = _mm_sub_epi32(u[0], u[7]);
-    v[1] = _mm_add_epi32(u[1], u[6]);
-    v[6] = _mm_sub_epi32(u[1], u[6]);
-    v[2] = _mm_add_epi32(u[2], u[5]);
-    v[5] = _mm_sub_epi32(u[2], u[5]);
-    v[3] = _mm_add_epi32(u[3], u[4]);
-    v[4] = _mm_sub_epi32(u[3], u[4]);
-    v[8] = u[8];
-    v[9] = u[9];
-
-    v[10] = _mm_mullo_epi32(u[10], cospim32);
-    x = _mm_mullo_epi32(u[13], cospi32);
-    v[10] = _mm_add_epi32(v[10], x);
-    v[10] = _mm_add_epi32(v[10], rnding);
-    v[10] = _mm_srai_epi32(v[10], bit);
-
-    v[13] = _mm_mullo_epi32(u[10], cospi32);
-    x = _mm_mullo_epi32(u[13], cospim32);
-    v[13] = _mm_sub_epi32(v[13], x);
-    v[13] = _mm_add_epi32(v[13], rnding);
-    v[13] = _mm_srai_epi32(v[13], bit);
-
-    v[11] = _mm_mullo_epi32(u[11], cospim32);
-    x = _mm_mullo_epi32(u[12], cospi32);
-    v[11] = _mm_add_epi32(v[11], x);
-    v[11] = _mm_add_epi32(v[11], rnding);
-    v[11] = _mm_srai_epi32(v[11], bit);
-
-    v[12] = _mm_mullo_epi32(u[11], cospi32);
-    x = _mm_mullo_epi32(u[12], cospim32);
-    v[12] = _mm_sub_epi32(v[12], x);
-    v[12] = _mm_add_epi32(v[12], rnding);
-    v[12] = _mm_srai_epi32(v[12], bit);
-    v[14] = u[14];
-    v[15] = u[15];
-
-    // stage 3
-    u[0] = _mm_add_epi32(v[0], v[3]);
-    u[3] = _mm_sub_epi32(v[0], v[3]);
-    u[1] = _mm_add_epi32(v[1], v[2]);
-    u[2] = _mm_sub_epi32(v[1], v[2]);
-    u[4] = v[4];
-
-    u[5] = _mm_mullo_epi32(v[5], cospim32);
-    x = _mm_mullo_epi32(v[6], cospi32);
-    u[5] = _mm_add_epi32(u[5], x);
-    u[5] = _mm_add_epi32(u[5], rnding);
-    u[5] = _mm_srai_epi32(u[5], bit);
-
-    u[6] = _mm_mullo_epi32(v[5], cospi32);
-    x = _mm_mullo_epi32(v[6], cospim32);
-    u[6] = _mm_sub_epi32(u[6], x);
-    u[6] = _mm_add_epi32(u[6], rnding);
-    u[6] = _mm_srai_epi32(u[6], bit);
-
-    u[7] = v[7];
-    u[8] = _mm_add_epi32(v[8], v[11]);
-    u[11] = _mm_sub_epi32(v[8], v[11]);
-    u[9] = _mm_add_epi32(v[9], v[10]);
-    u[10] = _mm_sub_epi32(v[9], v[10]);
-    u[12] = _mm_sub_epi32(v[15], v[12]);
-    u[15] = _mm_add_epi32(v[15], v[12]);
-    u[13] = _mm_sub_epi32(v[14], v[13]);
-    u[14] = _mm_add_epi32(v[14], v[13]);
-
-    // stage 4
-    u[0] = _mm_mullo_epi32(u[0], cospi32);
-    u[1] = _mm_mullo_epi32(u[1], cospi32);
-    v[0] = _mm_add_epi32(u[0], u[1]);
-    v[0] = _mm_add_epi32(v[0], rnding);
-    v[0] = _mm_srai_epi32(v[0], bit);
-
-    v[1] = _mm_sub_epi32(u[0], u[1]);
-    v[1] = _mm_add_epi32(v[1], rnding);
-    v[1] = _mm_srai_epi32(v[1], bit);
-
-    v[2] = _mm_mullo_epi32(u[2], cospi48);
-    x = _mm_mullo_epi32(u[3], cospi16);
-    v[2] = _mm_add_epi32(v[2], x);
-    v[2] = _mm_add_epi32(v[2], rnding);
-    v[2] = _mm_srai_epi32(v[2], bit);
-
-    v[3] = _mm_mullo_epi32(u[2], cospi16);
-    x = _mm_mullo_epi32(u[3], cospi48);
-    v[3] = _mm_sub_epi32(x, v[3]);
-    v[3] = _mm_add_epi32(v[3], rnding);
-    v[3] = _mm_srai_epi32(v[3], bit);
-
-    v[4] = _mm_add_epi32(u[4], u[5]);
-    v[5] = _mm_sub_epi32(u[4], u[5]);
-    v[6] = _mm_sub_epi32(u[7], u[6]);
-    v[7] = _mm_add_epi32(u[7], u[6]);
-    v[8] = u[8];
-
-    v[9] = _mm_mullo_epi32(u[9], cospim16);
-    x = _mm_mullo_epi32(u[14], cospi48);
-    v[9] = _mm_add_epi32(v[9], x);
-    v[9] = _mm_add_epi32(v[9], rnding);
-    v[9] = _mm_srai_epi32(v[9], bit);
-
-    v[14] = _mm_mullo_epi32(u[9], cospi48);
-    x = _mm_mullo_epi32(u[14], cospim16);
-    v[14] = _mm_sub_epi32(v[14], x);
-    v[14] = _mm_add_epi32(v[14], rnding);
-    v[14] = _mm_srai_epi32(v[14], bit);
-
-    v[10] = _mm_mullo_epi32(u[10], cospim48);
-    x = _mm_mullo_epi32(u[13], cospim16);
-    v[10] = _mm_add_epi32(v[10], x);
-    v[10] = _mm_add_epi32(v[10], rnding);
-    v[10] = _mm_srai_epi32(v[10], bit);
-
-    v[13] = _mm_mullo_epi32(u[10], cospim16);
-    x = _mm_mullo_epi32(u[13], cospim48);
-    v[13] = _mm_sub_epi32(v[13], x);
-    v[13] = _mm_add_epi32(v[13], rnding);
-    v[13] = _mm_srai_epi32(v[13], bit);
-
-    v[11] = u[11];
-    v[12] = u[12];
-    v[15] = u[15];
-
-    // stage 5
-    u[0] = v[0];
-    u[1] = v[1];
-    u[2] = v[2];
-    u[3] = v[3];
-
-    u[4] = _mm_mullo_epi32(v[4], cospi56);
-    x = _mm_mullo_epi32(v[7], cospi8);
-    u[4] = _mm_add_epi32(u[4], x);
-    u[4] = _mm_add_epi32(u[4], rnding);
-    u[4] = _mm_srai_epi32(u[4], bit);
-
-    u[7] = _mm_mullo_epi32(v[4], cospi8);
-    x = _mm_mullo_epi32(v[7], cospi56);
-    u[7] = _mm_sub_epi32(x, u[7]);
-    u[7] = _mm_add_epi32(u[7], rnding);
-    u[7] = _mm_srai_epi32(u[7], bit);
-
-    u[5] = _mm_mullo_epi32(v[5], cospi24);
-    x = _mm_mullo_epi32(v[6], cospi40);
-    u[5] = _mm_add_epi32(u[5], x);
-    u[5] = _mm_add_epi32(u[5], rnding);
-    u[5] = _mm_srai_epi32(u[5], bit);
-
-    u[6] = _mm_mullo_epi32(v[5], cospi40);
-    x = _mm_mullo_epi32(v[6], cospi24);
-    u[6] = _mm_sub_epi32(x, u[6]);
-    u[6] = _mm_add_epi32(u[6], rnding);
-    u[6] = _mm_srai_epi32(u[6], bit);
-
-    u[8] = _mm_add_epi32(v[8], v[9]);
-    u[9] = _mm_sub_epi32(v[8], v[9]);
-    u[10] = _mm_sub_epi32(v[11], v[10]);
-    u[11] = _mm_add_epi32(v[11], v[10]);
-    u[12] = _mm_add_epi32(v[12], v[13]);
-    u[13] = _mm_sub_epi32(v[12], v[13]);
-    u[14] = _mm_sub_epi32(v[15], v[14]);
-    u[15] = _mm_add_epi32(v[15], v[14]);
-
-    // stage 6
-    v[0] = u[0];
-    v[1] = u[1];
-    v[2] = u[2];
-    v[3] = u[3];
-    v[4] = u[4];
-    v[5] = u[5];
-    v[6] = u[6];
-    v[7] = u[7];
-
-    v[8] = _mm_mullo_epi32(u[8], cospi60);
-    x = _mm_mullo_epi32(u[15], cospi4);
-    v[8] = _mm_add_epi32(v[8], x);
-    v[8] = _mm_add_epi32(v[8], rnding);
-    v[8] = _mm_srai_epi32(v[8], bit);
-
-    v[15] = _mm_mullo_epi32(u[8], cospi4);
-    x = _mm_mullo_epi32(u[15], cospi60);
-    v[15] = _mm_sub_epi32(x, v[15]);
-    v[15] = _mm_add_epi32(v[15], rnding);
-    v[15] = _mm_srai_epi32(v[15], bit);
-
-    v[9] = _mm_mullo_epi32(u[9], cospi28);
-    x = _mm_mullo_epi32(u[14], cospi36);
-    v[9] = _mm_add_epi32(v[9], x);
-    v[9] = _mm_add_epi32(v[9], rnding);
-    v[9] = _mm_srai_epi32(v[9], bit);
-
-    v[14] = _mm_mullo_epi32(u[9], cospi36);
-    x = _mm_mullo_epi32(u[14], cospi28);
-    v[14] = _mm_sub_epi32(x, v[14]);
-    v[14] = _mm_add_epi32(v[14], rnding);
-    v[14] = _mm_srai_epi32(v[14], bit);
-
-    v[10] = _mm_mullo_epi32(u[10], cospi44);
-    x = _mm_mullo_epi32(u[13], cospi20);
-    v[10] = _mm_add_epi32(v[10], x);
-    v[10] = _mm_add_epi32(v[10], rnding);
-    v[10] = _mm_srai_epi32(v[10], bit);
-
-    v[13] = _mm_mullo_epi32(u[10], cospi20);
-    x = _mm_mullo_epi32(u[13], cospi44);
-    v[13] = _mm_sub_epi32(x, v[13]);
-    v[13] = _mm_add_epi32(v[13], rnding);
-    v[13] = _mm_srai_epi32(v[13], bit);
-
-    v[11] = _mm_mullo_epi32(u[11], cospi12);
-    x = _mm_mullo_epi32(u[12], cospi52);
-    v[11] = _mm_add_epi32(v[11], x);
-    v[11] = _mm_add_epi32(v[11], rnding);
-    v[11] = _mm_srai_epi32(v[11], bit);
-
-    v[12] = _mm_mullo_epi32(u[11], cospi52);
-    x = _mm_mullo_epi32(u[12], cospi12);
-    v[12] = _mm_sub_epi32(x, v[12]);
-    v[12] = _mm_add_epi32(v[12], rnding);
-    v[12] = _mm_srai_epi32(v[12], bit);
-
-    out[0 * col_num + col] = v[0];
-    out[1 * col_num + col] = v[8];
-    out[2 * col_num + col] = v[4];
-    out[3 * col_num + col] = v[12];
-    out[4 * col_num + col] = v[2];
-    out[5 * col_num + col] = v[10];
-    out[6 * col_num + col] = v[6];
-    out[7 * col_num + col] = v[14];
-    out[8 * col_num + col] = v[1];
-    out[9 * col_num + col] = v[9];
-    out[10 * col_num + col] = v[5];
-    out[11 * col_num + col] = v[13];
-    out[12 * col_num + col] = v[3];
-    out[13 * col_num + col] = v[11];
-    out[14 * col_num + col] = v[7];
-    out[15 * col_num + col] = v[15];
-  }
-}
-
-static void fadst16x16_sse4_1(__m128i *in, __m128i *out, int bit,
-                              const int num_cols) {
-  const int32_t *cospi = cospi_arr(bit);
-  const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
-  const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
-  const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
-  const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
-  const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
-  const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
-  const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
-  const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
-  const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
-  const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
-  const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
-  const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
-  const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
-  const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
-  const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
-  const __m128i cospim2 = _mm_set1_epi32(-cospi[2]);
-  const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
-  const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
-  const __m128i cospim10 = _mm_set1_epi32(-cospi[10]);
-  const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
-  const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
-  const __m128i cospim18 = _mm_set1_epi32(-cospi[18]);
-  const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
-  const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
-  const __m128i cospim26 = _mm_set1_epi32(-cospi[26]);
-  const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
-  const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
-  const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
-  const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
-  const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
-  const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
-  const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
-  const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
-  const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
-  const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
-  const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
-  const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
-  const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
-  const __m128i zero = _mm_setzero_si128();
-
-  __m128i u[16], v[16], x, y;
-  int col;
-
-  for (col = 0; col < num_cols; ++col) {
-    // stage 0
-    // stage 1
-    u[0] = in[0 * num_cols + col];
-    u[1] = _mm_sub_epi32(zero, in[15 * num_cols + col]);
-    u[2] = _mm_sub_epi32(zero, in[7 * num_cols + col]);
-    u[3] = in[8 * num_cols + col];
-    u[4] = _mm_sub_epi32(zero, in[3 * num_cols + col]);
-    u[5] = in[12 * num_cols + col];
-    u[6] = in[4 * num_cols + col];
-    u[7] = _mm_sub_epi32(zero, in[11 * num_cols + col]);
-    u[8] = _mm_sub_epi32(zero, in[1 * num_cols + col]);
-    u[9] = in[14 * num_cols + col];
-    u[10] = in[6 * num_cols + col];
-    u[11] = _mm_sub_epi32(zero, in[9 * num_cols + col]);
-    u[12] = in[2 * num_cols + col];
-    u[13] = _mm_sub_epi32(zero, in[13 * num_cols + col]);
-    u[14] = _mm_sub_epi32(zero, in[5 * num_cols + col]);
-    u[15] = in[10 * num_cols + col];
-
-    // stage 2
-    v[0] = u[0];
-    v[1] = u[1];
-
-    x = _mm_mullo_epi32(u[2], cospi32);
-    y = _mm_mullo_epi32(u[3], cospi32);
-    v[2] = _mm_add_epi32(x, y);
-    v[2] = _mm_add_epi32(v[2], rnding);
-    v[2] = _mm_srai_epi32(v[2], bit);
-
-    v[3] = _mm_sub_epi32(x, y);
-    v[3] = _mm_add_epi32(v[3], rnding);
-    v[3] = _mm_srai_epi32(v[3], bit);
-
-    v[4] = u[4];
-    v[5] = u[5];
-
-    x = _mm_mullo_epi32(u[6], cospi32);
-    y = _mm_mullo_epi32(u[7], cospi32);
-    v[6] = _mm_add_epi32(x, y);
-    v[6] = _mm_add_epi32(v[6], rnding);
-    v[6] = _mm_srai_epi32(v[6], bit);
-
-    v[7] = _mm_sub_epi32(x, y);
-    v[7] = _mm_add_epi32(v[7], rnding);
-    v[7] = _mm_srai_epi32(v[7], bit);
-
-    v[8] = u[8];
-    v[9] = u[9];
-
-    x = _mm_mullo_epi32(u[10], cospi32);
-    y = _mm_mullo_epi32(u[11], cospi32);
-    v[10] = _mm_add_epi32(x, y);
-    v[10] = _mm_add_epi32(v[10], rnding);
-    v[10] = _mm_srai_epi32(v[10], bit);
-
-    v[11] = _mm_sub_epi32(x, y);
-    v[11] = _mm_add_epi32(v[11], rnding);
-    v[11] = _mm_srai_epi32(v[11], bit);
-
-    v[12] = u[12];
-    v[13] = u[13];
-
-    x = _mm_mullo_epi32(u[14], cospi32);
-    y = _mm_mullo_epi32(u[15], cospi32);
-    v[14] = _mm_add_epi32(x, y);
-    v[14] = _mm_add_epi32(v[14], rnding);
-    v[14] = _mm_srai_epi32(v[14], bit);
-
-    v[15] = _mm_sub_epi32(x, y);
-    v[15] = _mm_add_epi32(v[15], rnding);
-    v[15] = _mm_srai_epi32(v[15], bit);
-
-    // stage 3
-    u[0] = _mm_add_epi32(v[0], v[2]);
-    u[1] = _mm_add_epi32(v[1], v[3]);
-    u[2] = _mm_sub_epi32(v[0], v[2]);
-    u[3] = _mm_sub_epi32(v[1], v[3]);
-    u[4] = _mm_add_epi32(v[4], v[6]);
-    u[5] = _mm_add_epi32(v[5], v[7]);
-    u[6] = _mm_sub_epi32(v[4], v[6]);
-    u[7] = _mm_sub_epi32(v[5], v[7]);
-    u[8] = _mm_add_epi32(v[8], v[10]);
-    u[9] = _mm_add_epi32(v[9], v[11]);
-    u[10] = _mm_sub_epi32(v[8], v[10]);
-    u[11] = _mm_sub_epi32(v[9], v[11]);
-    u[12] = _mm_add_epi32(v[12], v[14]);
-    u[13] = _mm_add_epi32(v[13], v[15]);
-    u[14] = _mm_sub_epi32(v[12], v[14]);
-    u[15] = _mm_sub_epi32(v[13], v[15]);
-
-    // stage 4
-    v[0] = u[0];
-    v[1] = u[1];
-    v[2] = u[2];
-    v[3] = u[3];
-    v[4] = half_btf_sse4_1(&cospi16, &u[4], &cospi48, &u[5], &rnding, bit);
-    v[5] = half_btf_sse4_1(&cospi48, &u[4], &cospim16, &u[5], &rnding, bit);
-    v[6] = half_btf_sse4_1(&cospim48, &u[6], &cospi16, &u[7], &rnding, bit);
-    v[7] = half_btf_sse4_1(&cospi16, &u[6], &cospi48, &u[7], &rnding, bit);
-    v[8] = u[8];
-    v[9] = u[9];
-    v[10] = u[10];
-    v[11] = u[11];
-    v[12] = half_btf_sse4_1(&cospi16, &u[12], &cospi48, &u[13], &rnding, bit);
-    v[13] = half_btf_sse4_1(&cospi48, &u[12], &cospim16, &u[13], &rnding, bit);
-    v[14] = half_btf_sse4_1(&cospim48, &u[14], &cospi16, &u[15], &rnding, bit);
-    v[15] = half_btf_sse4_1(&cospi16, &u[14], &cospi48, &u[15], &rnding, bit);
-
-    // stage 5
-    u[0] = _mm_add_epi32(v[0], v[4]);
-    u[1] = _mm_add_epi32(v[1], v[5]);
-    u[2] = _mm_add_epi32(v[2], v[6]);
-    u[3] = _mm_add_epi32(v[3], v[7]);
-    u[4] = _mm_sub_epi32(v[0], v[4]);
-    u[5] = _mm_sub_epi32(v[1], v[5]);
-    u[6] = _mm_sub_epi32(v[2], v[6]);
-    u[7] = _mm_sub_epi32(v[3], v[7]);
-    u[8] = _mm_add_epi32(v[8], v[12]);
-    u[9] = _mm_add_epi32(v[9], v[13]);
-    u[10] = _mm_add_epi32(v[10], v[14]);
-    u[11] = _mm_add_epi32(v[11], v[15]);
-    u[12] = _mm_sub_epi32(v[8], v[12]);
-    u[13] = _mm_sub_epi32(v[9], v[13]);
-    u[14] = _mm_sub_epi32(v[10], v[14]);
-    u[15] = _mm_sub_epi32(v[11], v[15]);
-
-    // stage 6
-    v[0] = u[0];
-    v[1] = u[1];
-    v[2] = u[2];
-    v[3] = u[3];
-    v[4] = u[4];
-    v[5] = u[5];
-    v[6] = u[6];
-    v[7] = u[7];
-    v[8] = half_btf_sse4_1(&cospi8, &u[8], &cospi56, &u[9], &rnding, bit);
-    v[9] = half_btf_sse4_1(&cospi56, &u[8], &cospim8, &u[9], &rnding, bit);
-    v[10] = half_btf_sse4_1(&cospi40, &u[10], &cospi24, &u[11], &rnding, bit);
-    v[11] = half_btf_sse4_1(&cospi24, &u[10], &cospim40, &u[11], &rnding, bit);
-    v[12] = half_btf_sse4_1(&cospim56, &u[12], &cospi8, &u[13], &rnding, bit);
-    v[13] = half_btf_sse4_1(&cospi8, &u[12], &cospi56, &u[13], &rnding, bit);
-    v[14] = half_btf_sse4_1(&cospim24, &u[14], &cospi40, &u[15], &rnding, bit);
-    v[15] = half_btf_sse4_1(&cospi40, &u[14], &cospi24, &u[15], &rnding, bit);
-
-    // stage 7
-    u[0] = _mm_add_epi32(v[0], v[8]);
-    u[1] = _mm_add_epi32(v[1], v[9]);
-    u[2] = _mm_add_epi32(v[2], v[10]);
-    u[3] = _mm_add_epi32(v[3], v[11]);
-    u[4] = _mm_add_epi32(v[4], v[12]);
-    u[5] = _mm_add_epi32(v[5], v[13]);
-    u[6] = _mm_add_epi32(v[6], v[14]);
-    u[7] = _mm_add_epi32(v[7], v[15]);
-    u[8] = _mm_sub_epi32(v[0], v[8]);
-    u[9] = _mm_sub_epi32(v[1], v[9]);
-    u[10] = _mm_sub_epi32(v[2], v[10]);
-    u[11] = _mm_sub_epi32(v[3], v[11]);
-    u[12] = _mm_sub_epi32(v[4], v[12]);
-    u[13] = _mm_sub_epi32(v[5], v[13]);
-    u[14] = _mm_sub_epi32(v[6], v[14]);
-    u[15] = _mm_sub_epi32(v[7], v[15]);
-
-    // stage 8
-    v[0] = half_btf_sse4_1(&cospi2, &u[0], &cospi62, &u[1], &rnding, bit);
-    v[1] = half_btf_sse4_1(&cospi62, &u[0], &cospim2, &u[1], &rnding, bit);
-    v[2] = half_btf_sse4_1(&cospi10, &u[2], &cospi54, &u[3], &rnding, bit);
-    v[3] = half_btf_sse4_1(&cospi54, &u[2], &cospim10, &u[3], &rnding, bit);
-    v[4] = half_btf_sse4_1(&cospi18, &u[4], &cospi46, &u[5], &rnding, bit);
-    v[5] = half_btf_sse4_1(&cospi46, &u[4], &cospim18, &u[5], &rnding, bit);
-    v[6] = half_btf_sse4_1(&cospi26, &u[6], &cospi38, &u[7], &rnding, bit);
-    v[7] = half_btf_sse4_1(&cospi38, &u[6], &cospim26, &u[7], &rnding, bit);
-    v[8] = half_btf_sse4_1(&cospi34, &u[8], &cospi30, &u[9], &rnding, bit);
-    v[9] = half_btf_sse4_1(&cospi30, &u[8], &cospim34, &u[9], &rnding, bit);
-    v[10] = half_btf_sse4_1(&cospi42, &u[10], &cospi22, &u[11], &rnding, bit);
-    v[11] = half_btf_sse4_1(&cospi22, &u[10], &cospim42, &u[11], &rnding, bit);
-    v[12] = half_btf_sse4_1(&cospi50, &u[12], &cospi14, &u[13], &rnding, bit);
-    v[13] = half_btf_sse4_1(&cospi14, &u[12], &cospim50, &u[13], &rnding, bit);
-    v[14] = half_btf_sse4_1(&cospi58, &u[14], &cospi6, &u[15], &rnding, bit);
-    v[15] = half_btf_sse4_1(&cospi6, &u[14], &cospim58, &u[15], &rnding, bit);
-
-    // stage 9
-    out[0 * num_cols + col] = v[1];
-    out[1 * num_cols + col] = v[14];
-    out[2 * num_cols + col] = v[3];
-    out[3 * num_cols + col] = v[12];
-    out[4 * num_cols + col] = v[5];
-    out[5 * num_cols + col] = v[10];
-    out[6 * num_cols + col] = v[7];
-    out[7 * num_cols + col] = v[8];
-    out[8 * num_cols + col] = v[9];
-    out[9 * num_cols + col] = v[6];
-    out[10 * num_cols + col] = v[11];
-    out[11 * num_cols + col] = v[4];
-    out[12 * num_cols + col] = v[13];
-    out[13 * num_cols + col] = v[2];
-    out[14 * num_cols + col] = v[15];
-    out[15 * num_cols + col] = v[0];
-  }
-}
-
-static void col_txfm_16x16_rounding(__m128i *in, int shift) {
-  // Note:
-  //  We split 16x16 rounding into 4 sections of 8x8 rounding,
-  //  instead of 4 columns
-  col_txfm_8x8_rounding(&in[0], shift);
-  col_txfm_8x8_rounding(&in[16], shift);
-  col_txfm_8x8_rounding(&in[32], shift);
-  col_txfm_8x8_rounding(&in[48], shift);
-}
-
-static void col_txfm_8x16_rounding(__m128i *in, int shift) {
-  col_txfm_8x8_rounding(&in[0], shift);
-  col_txfm_8x8_rounding(&in[16], shift);
-}
-
-static void write_buffer_16x16(const __m128i *in, int32_t *output) {
-  const int size_8x8 = 16 * 4;
-  write_buffer_8x8(&in[0], output);
-  output += size_8x8;
-  write_buffer_8x8(&in[16], output);
-  output += size_8x8;
-  write_buffer_8x8(&in[32], output);
-  output += size_8x8;
-  write_buffer_8x8(&in[48], output);
-}
-
-void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff,
-                                 int stride, TX_TYPE tx_type, int bd) {
-  __m128i in[64], out[64];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_16X16];
-  const int txw_idx = get_txw_idx(TX_16X16);
-  const int txh_idx = get_txh_idx(TX_16X16);
-  const int col_num = 4;
-  switch (tx_type) {
-    case DCT_DCT:
-      load_buffer_16x16(input, in, stride, 0, 0, shift[0]);
-      fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    case ADST_DCT:
-      load_buffer_16x16(input, in, stride, 0, 0, shift[0]);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    case DCT_ADST:
-      load_buffer_16x16(input, in, stride, 0, 0, shift[0]);
-      fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    case ADST_ADST:
-      load_buffer_16x16(input, in, stride, 0, 0, shift[0]);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    case FLIPADST_DCT:
-      load_buffer_16x16(input, in, stride, 1, 0, shift[0]);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    case DCT_FLIPADST:
-      load_buffer_16x16(input, in, stride, 0, 1, shift[0]);
-      fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    case FLIPADST_FLIPADST:
-      load_buffer_16x16(input, in, stride, 1, 1, shift[0]);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    case ADST_FLIPADST:
-      load_buffer_16x16(input, in, stride, 0, 1, shift[0]);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    case FLIPADST_ADST:
-      load_buffer_16x16(input, in, stride, 1, 0, shift[0]);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num);
-      col_txfm_16x16_rounding(out, -shift[1]);
-      transpose_16x16(out, in);
-      fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num);
-      transpose_16x16(out, in);
-      write_buffer_16x16(in, coeff);
-      break;
-    default: assert(0);
-  }
-  (void)bd;
-}
-
-static INLINE void flip_buf_sse4_1(__m128i *in, __m128i *out, int size) {
-  for (int i = 0; i < size; i += 2) in[30 - i] = out[i];
-  for (int i = 1; i < size; i += 2) in[size - i] = out[i];
-}
-
-static const fwd_transform_1d_sse4_1 col_highbd_txfm8x8_arr[TX_TYPES] = {
-  fdct8x8_sse4_1,   // DCT_DCT
-  fadst8x8_sse4_1,  // ADST_DCT
-  fdct8x8_sse4_1,   // DCT_ADST
-  fadst8x8_sse4_1,  // ADST_ADST
-  fadst8x8_sse4_1,  // FLIPADST_DCT
-  fdct8x8_sse4_1,   // DCT_FLIPADST
-  fadst8x8_sse4_1,  // FLIPADST_FLIPADST
-  fadst8x8_sse4_1,  // ADST_FLIPADST
-  fadst8x8_sse4_1,  // FLIPADST_ADST
-  NULL,             // IDTX
-  NULL,             // V_DCT
-  NULL,             // H_DCT
-  NULL,             // V_ADST
-  NULL,             // H_ADST
-  NULL,             // V_FLIPADST
-  NULL              // H_FLIPADST
-};
-
-static const fwd_transform_1d_sse4_1 row_highbd_txfm8x16_arr[TX_TYPES] = {
-  fdct16x16_sse4_1,   // DCT_DCT
-  fdct16x16_sse4_1,   // ADST_DCT
-  fadst16x16_sse4_1,  // DCT_ADST
-  fadst16x16_sse4_1,  // ADST_ADST
-  fdct16x16_sse4_1,   // FLIPADST_DCT
-  fadst16x16_sse4_1,  // DCT_FLIPADST
-  fadst16x16_sse4_1,  // FLIPADST_FLIPADST
-  fadst16x16_sse4_1,  // ADST_FLIPADST
-  fadst16x16_sse4_1,  // FLIPADST_ADST
-  NULL,               // IDTX
-  NULL,               // V_DCT
-  NULL,               // H_DCT
-  NULL,               // V_ADST
-  NULL,               // H_ADST
-  NULL,               // V_FLIPADST
-  NULL                // H_FLIPADST
-};
-
-static const fwd_transform_1d_sse4_1 col_highbd_txfm8x16_arr[TX_TYPES] = {
-  fdct16x16_sse4_1,   // DCT_DCT
-  fadst16x16_sse4_1,  // ADST_DCT
-  fdct16x16_sse4_1,   // DCT_ADST
-  fadst16x16_sse4_1,  // ADST_ADST
-  fadst16x16_sse4_1,  // FLIPADST_DCT
-  fdct16x16_sse4_1,   // DCT_FLIPADST
-  fadst16x16_sse4_1,  // FLIPADST_FLIPADST
-  fadst16x16_sse4_1,  // ADST_FLIPADST
-  fadst16x16_sse4_1,  // FLIPADST_ADST
-  NULL,               // IDTX
-  NULL,               // V_DCT
-  NULL,               // H_DCT
-  NULL,               // V_ADST
-  NULL,               // H_ADST
-  NULL,               // V_FLIPADST
-  NULL                // H_FLIPADST
-};
-static const fwd_transform_1d_sse4_1 row_highbd_txfm8x8_arr[TX_TYPES] = {
-  fdct8x8_sse4_1,   // DCT_DCT
-  fdct8x8_sse4_1,   // ADST_DCT
-  fadst8x8_sse4_1,  // DCT_ADST
-  fadst8x8_sse4_1,  // ADST_ADST
-  fdct8x8_sse4_1,   // FLIPADST_DCT
-  fadst8x8_sse4_1,  // DCT_FLIPADST
-  fadst8x8_sse4_1,  // FLIPADST_FLIPADST
-  fadst8x8_sse4_1,  // ADST_FLIPADST
-  fadst8x8_sse4_1,  // FLIPADST_ADST
-  NULL,             // IDTX
-  NULL,             // V_DCT
-  NULL,             // H_DCT
-  NULL,             // V_ADST
-  NULL,             // H_ADST
-  NULL,             // V_FLIPADST
-  NULL              // H_FLIPADST
-};
-
-void av1_fwd_txfm2d_16x8_sse4_1(const int16_t *input, int32_t *coeff,
-                                int stride, TX_TYPE tx_type, int bd) {
-  __m128i in[32], out[32];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_16X8];
-  const int txw_idx = get_txw_idx(TX_16X8);
-  const int txh_idx = get_txh_idx(TX_16X8);
-  const fwd_transform_1d_sse4_1 col_txfm = col_highbd_txfm8x8_arr[tx_type];
-  const fwd_transform_1d_sse4_1 row_txfm = row_highbd_txfm8x16_arr[tx_type];
-  int bit = fwd_cos_bit_col[txw_idx][txh_idx];
-  int ud_flip, lr_flip;
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-  for (int i = 0; i < 2; i++) {
-    load_buffer_8x8(input + i * 8, in, stride, ud_flip, 0, shift[0]);
-    col_txfm(in, in, bit, 0);
-    col_txfm_8x8_rounding(in, -shift[1]);
-    transpose_8x8(in, out + i * 16);
-  }
-
-  if (lr_flip) {
-    flip_buf_sse4_1(in, out, 32);
-    row_txfm(in, out, bit, 2);
-  } else {
-    row_txfm(out, out, bit, 2);
-  }
-
-  for (int i = 0; i < 2; i++) {
-    transpose_8x8(out + i * 16, in);
-    av1_round_shift_rect_array_32_sse4_1(in, in, 16, -shift[2], NewSqrt2);
-    write_buffer_16x8(in, coeff + i * 8, 16);
-  }
-
-  (void)bd;
-}
-
-void av1_fwd_txfm2d_8x16_sse4_1(const int16_t *input, int32_t *coeff,
-                                int stride, TX_TYPE tx_type, int bd) {
-  __m128i in[32], out[32];
-  const int8_t *shift = fwd_txfm_shift_ls[TX_8X16];
-  const int txw_idx = get_txw_idx(TX_8X16);
-  const int txh_idx = get_txh_idx(TX_8X16);
-  const fwd_transform_1d_sse4_1 col_txfm = col_highbd_txfm8x16_arr[tx_type];
-  const fwd_transform_1d_sse4_1 row_txfm = row_highbd_txfm8x8_arr[tx_type];
-  int bit = fwd_cos_bit_col[txw_idx][txh_idx];
-  int ud_flip, lr_flip;
-  get_flip_cfg(tx_type, &ud_flip, &lr_flip);
-
-  load_buffer_8x16(input, in, stride, ud_flip, lr_flip, shift[0]);
-  col_txfm(in, in, bit, 2);
-  col_txfm_8x16_rounding(in, -shift[1]);
-  transpose_8x8(in, out);
-  transpose_8x8(in + 16, out + 16);
-
-  for (int i = 0; i < 2; i++) {
-    row_txfm(out + i * 16, out, bit, 0);
-    transpose_8x8(out, in);
-    av1_round_shift_rect_array_32_sse4_1(in, in, 16, -shift[2], NewSqrt2);
-    write_buffer_8x8(in, coeff + i * 64);
-  }
-
-  (void)bd;
-}
diff --git a/third_party/aom/av1/encoder/x86/pickrst_avx2.c b/third_party/aom/av1/encoder/x86/pickrst_avx2.c
deleted file mode 100644
index 06aaaa7ee..000000000
--- a/third_party/aom/av1/encoder/x86/pickrst_avx2.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <immintrin.h>  // AVX2
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-#include "aom_dsp/x86/transpose_sse2.h"
-
-#include "config/av1_rtcd.h"
-#include "av1/common/restoration.h"
-#include "av1/encoder/pickrst.h"
-
-static INLINE void acc_stat_avx2(int32_t *dst, const uint8_t *src,
-                                 const __m128i *shuffle, const __m256i *kl) {
-  const __m128i s = _mm_shuffle_epi8(xx_loadu_128(src), *shuffle);
-  const __m256i d0 = _mm256_madd_epi16(*kl, _mm256_cvtepu8_epi16(s));
-  const __m256i dst0 = yy_loadu_256(dst);
-  const __m256i r0 = _mm256_add_epi32(dst0, d0);
-  yy_storeu_256(dst, r0);
-}
-
-static INLINE void acc_stat_win7_one_line_avx2(
-    const uint8_t *dgd, const uint8_t *src, int h_start, int h_end,
-    int dgd_stride, const __m128i *shuffle, int32_t *sumX,
-    int32_t sumY[WIENER_WIN][WIENER_WIN], int32_t M_int[WIENER_WIN][WIENER_WIN],
-    int32_t H_int[WIENER_WIN2][WIENER_WIN * 8]) {
-  int j, k, l;
-  const int wiener_win = WIENER_WIN;
-  for (j = h_start; j < h_end; j += 2) {
-    const uint8_t X1 = src[j];
-    const uint8_t X2 = src[j + 1];
-    *sumX += X1 + X2;
-    const uint8_t *dgd_ij = dgd + j;
-    for (k = 0; k < wiener_win; k++) {
-      const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride;
-      for (l = 0; l < wiener_win; l++) {
-        int32_t *H_ = &H_int[(l * wiener_win + k)][0];
-        const uint8_t D1 = dgd_ijk[l];
-        const uint8_t D2 = dgd_ijk[l + 1];
-        sumY[k][l] += D1 + D2;
-        M_int[k][l] += D1 * X1 + D2 * X2;
-
-        const __m256i kl =
-            _mm256_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
-        acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 5 * 8, dgd_ij + 5 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 6 * 8, dgd_ij + 6 * dgd_stride, shuffle, &kl);
-      }
-    }
-  }
-}
-
-static INLINE void compute_stats_win7_opt_avx2(
-    const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start,
-    int v_end, int dgd_stride, int src_stride, double *M, double *H) {
-  int i, j, k, l, m, n;
-  const int wiener_win = WIENER_WIN;
-  const int pixel_count = (h_end - h_start) * (v_end - v_start);
-  const int wiener_win2 = wiener_win * wiener_win;
-  const int wiener_halfwin = (wiener_win >> 1);
-  const double avg =
-      find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
-  int32_t M_int32[WIENER_WIN][WIENER_WIN] = { { 0 } };
-  int64_t M_int64[WIENER_WIN][WIENER_WIN] = { { 0 } };
-  int32_t H_int32[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } };
-  int64_t H_int64[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } };
-  int32_t sumY[WIENER_WIN][WIENER_WIN] = { { 0 } };
-  int32_t sumX = 0;
-  const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
-  const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data);
-  for (j = v_start; j < v_end; j += 64) {
-    const int vert_end = AOMMIN(64, v_end - j) + j;
-    for (i = j; i < vert_end; i++) {
-      acc_stat_win7_one_line_avx2(
-          dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end,
-          dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32);
-    }
-    for (k = 0; k < wiener_win; ++k) {
-      for (l = 0; l < wiener_win; ++l) {
-        M_int64[k][l] += M_int32[k][l];
-        M_int32[k][l] = 0;
-      }
-    }
-    for (k = 0; k < WIENER_WIN2; ++k) {
-      for (l = 0; l < WIENER_WIN * 8; ++l) {
-        H_int64[k][l] += H_int32[k][l];
-        H_int32[k][l] = 0;
-      }
-    }
-  }
-
-  const double avg_square_sum = avg * avg * pixel_count;
-  for (k = 0; k < wiener_win; k++) {
-    for (l = 0; l < wiener_win; l++) {
-      const int32_t idx0 = l * wiener_win + k;
-      M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]);
-      double *H_ = H + idx0 * wiener_win2;
-      int64_t *H_int_ = &H_int64[idx0][0];
-      for (m = 0; m < wiener_win; m++) {
-        for (n = 0; n < wiener_win; n++) {
-          H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum -
-                                   avg * (sumY[k][l] + sumY[n][m]);
-        }
-      }
-    }
-  }
-}
-
-static INLINE void acc_stat_win5_one_line_avx2(
-    const uint8_t *dgd, const uint8_t *src, int h_start, int h_end,
-    int dgd_stride, const __m128i *shuffle, int32_t *sumX,
-    int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA],
-    int32_t M_int[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA],
-    int32_t H_int[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8]) {
-  int j, k, l;
-  const int wiener_win = WIENER_WIN_CHROMA;
-  for (j = h_start; j < h_end; j += 2) {
-    const uint8_t X1 = src[j];
-    const uint8_t X2 = src[j + 1];
-    *sumX += X1 + X2;
-    const uint8_t *dgd_ij = dgd + j;
-    for (k = 0; k < wiener_win; k++) {
-      const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride;
-      for (l = 0; l < wiener_win; l++) {
-        int32_t *H_ = &H_int[(l * wiener_win + k)][0];
-        const uint8_t D1 = dgd_ijk[l];
-        const uint8_t D2 = dgd_ijk[l + 1];
-        sumY[k][l] += D1 + D2;
-        M_int[k][l] += D1 * X1 + D2 * X2;
-
-        const __m256i kl =
-            _mm256_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
-        acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl);
-        acc_stat_avx2(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl);
-      }
-    }
-  }
-}
-
-static INLINE void compute_stats_win5_opt_avx2(
-    const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start,
-    int v_end, int dgd_stride, int src_stride, double *M, double *H) {
-  int i, j, k, l, m, n;
-  const int wiener_win = WIENER_WIN_CHROMA;
-  const int pixel_count = (h_end - h_start) * (v_end - v_start);
-  const int wiener_win2 = wiener_win * wiener_win;
-  const int wiener_halfwin = (wiener_win >> 1);
-  const double avg =
-      find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
-  int32_t M_int32[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
-  int64_t M_int64[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
-  int32_t H_int32[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } };
-  int64_t H_int64[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } };
-  int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
-  int32_t sumX = 0;
-  const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
-  const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data);
-  for (j = v_start; j < v_end; j += 64) {
-    const int vert_end = AOMMIN(64, v_end - j) + j;
-    for (i = j; i < vert_end; i++) {
-      acc_stat_win5_one_line_avx2(
-          dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end,
-          dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32);
-    }
-    for (k = 0; k < wiener_win; ++k) {
-      for (l = 0; l < wiener_win; ++l) {
-        M_int64[k][l] += M_int32[k][l];
-        M_int32[k][l] = 0;
-      }
-    }
-    for (k = 0; k < WIENER_WIN2_CHROMA; ++k) {
-      for (l = 0; l < WIENER_WIN_CHROMA * 8; ++l) {
-        H_int64[k][l] += H_int32[k][l];
-        H_int32[k][l] = 0;
-      }
-    }
-  }
-
-  const double avg_square_sum = avg * avg * pixel_count;
-  for (k = 0; k < wiener_win; k++) {
-    for (l = 0; l < wiener_win; l++) {
-      const int32_t idx0 = l * wiener_win + k;
-      M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]);
-      double *H_ = H + idx0 * wiener_win2;
-      int64_t *H_int_ = &H_int64[idx0][0];
-      for (m = 0; m < wiener_win; m++) {
-        for (n = 0; n < wiener_win; n++) {
-          H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum -
-                                   avg * (sumY[k][l] + sumY[n][m]);
-        }
-      }
-    }
-  }
-}
-
-void av1_compute_stats_avx2(int wiener_win, const uint8_t *dgd,
-                            const uint8_t *src, int h_start, int h_end,
-                            int v_start, int v_end, int dgd_stride,
-                            int src_stride, double *M, double *H) {
-  if (wiener_win == WIENER_WIN) {
-    compute_stats_win7_opt_avx2(dgd, src, h_start, h_end, v_start, v_end,
-                                dgd_stride, src_stride, M, H);
-  } else if (wiener_win == WIENER_WIN_CHROMA) {
-    compute_stats_win5_opt_avx2(dgd, src, h_start, h_end, v_start, v_end,
-                                dgd_stride, src_stride, M, H);
-  } else {
-    av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
-                        dgd_stride, src_stride, M, H);
-  }
-}
-
-static INLINE __m256i pair_set_epi16(uint16_t a, uint16_t b) {
-  return _mm256_set1_epi32(
-      (int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)));
-}
-
-int64_t av1_lowbd_pixel_proj_error_avx2(
-    const uint8_t *src8, int width, int height, int src_stride,
-    const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride,
-    int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params) {
-  int i, j, k;
-  const int32_t shift = SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS;
-  const __m256i rounding = _mm256_set1_epi32(1 << (shift - 1));
-  __m256i sum64 = _mm256_setzero_si256();
-  const uint8_t *src = src8;
-  const uint8_t *dat = dat8;
-  int64_t err = 0;
-  if (params->r[0] > 0 && params->r[1] > 0) {
-    __m256i xq_coeff = pair_set_epi16(xq[0], xq[1]);
-    for (i = 0; i < height; ++i) {
-      __m256i sum32 = _mm256_setzero_si256();
-      for (j = 0; j <= width - 16; j += 16) {
-        const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j));
-        const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j));
-        const __m256i flt0_16b = _mm256_permute4x64_epi64(
-            _mm256_packs_epi32(yy_loadu_256(flt0 + j),
-                               yy_loadu_256(flt0 + j + 8)),
-            0xd8);
-        const __m256i flt1_16b = _mm256_permute4x64_epi64(
-            _mm256_packs_epi32(yy_loadu_256(flt1 + j),
-                               yy_loadu_256(flt1 + j + 8)),
-            0xd8);
-        const __m256i u0 = _mm256_slli_epi16(d0, SGRPROJ_RST_BITS);
-        const __m256i flt0_0_sub_u = _mm256_sub_epi16(flt0_16b, u0);
-        const __m256i flt1_0_sub_u = _mm256_sub_epi16(flt1_16b, u0);
-        const __m256i v0 = _mm256_madd_epi16(
-            xq_coeff, _mm256_unpacklo_epi16(flt0_0_sub_u, flt1_0_sub_u));
-        const __m256i v1 = _mm256_madd_epi16(
-            xq_coeff, _mm256_unpackhi_epi16(flt0_0_sub_u, flt1_0_sub_u));
-        const __m256i vr0 =
-            _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift);
-        const __m256i vr1 =
-            _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift);
-        const __m256i e0 = _mm256_sub_epi16(
-            _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0);
-        const __m256i err0 = _mm256_madd_epi16(e0, e0);
-        sum32 = _mm256_add_epi32(sum32, err0);
-      }
-      for (k = j; k < width; ++k) {
-        const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
-        int32_t v = xq[0] * (flt0[k] - u) + xq[1] * (flt1[k] - u);
-        const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt0 += flt0_stride;
-      flt1 += flt1_stride;
-      const __m256i sum64_0 =
-          _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32));
-      const __m256i sum64_1 =
-          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1));
-      sum64 = _mm256_add_epi64(sum64, sum64_0);
-      sum64 = _mm256_add_epi64(sum64, sum64_1);
-    }
-  } else if (params->r[0] > 0) {
-    __m256i xq_coeff =
-        pair_set_epi16(xq[0], (-xq[0] * (1 << SGRPROJ_RST_BITS)));
-    for (i = 0; i < height; ++i) {
-      __m256i sum32 = _mm256_setzero_si256();
-      for (j = 0; j <= width - 16; j += 16) {
-        const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j));
-        const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j));
-        const __m256i flt0_16b = _mm256_permute4x64_epi64(
-            _mm256_packs_epi32(yy_loadu_256(flt0 + j),
-                               yy_loadu_256(flt0 + j + 8)),
-            0xd8);
-        const __m256i v0 =
-            _mm256_madd_epi16(xq_coeff, _mm256_unpacklo_epi16(flt0_16b, d0));
-        const __m256i v1 =
-            _mm256_madd_epi16(xq_coeff, _mm256_unpackhi_epi16(flt0_16b, d0));
-        const __m256i vr0 =
-            _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift);
-        const __m256i vr1 =
-            _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift);
-        const __m256i e0 = _mm256_sub_epi16(
-            _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0);
-        const __m256i err0 = _mm256_madd_epi16(e0, e0);
-        sum32 = _mm256_add_epi32(sum32, err0);
-      }
-      for (k = j; k < width; ++k) {
-        const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
-        int32_t v = xq[0] * (flt0[k] - u);
-        const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt0 += flt0_stride;
-      const __m256i sum64_0 =
-          _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32));
-      const __m256i sum64_1 =
-          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1));
-      sum64 = _mm256_add_epi64(sum64, sum64_0);
-      sum64 = _mm256_add_epi64(sum64, sum64_1);
-    }
-  } else if (params->r[1] > 0) {
-    __m256i xq_coeff = pair_set_epi16(xq[1], -(xq[1] << SGRPROJ_RST_BITS));
-    for (i = 0; i < height; ++i) {
-      __m256i sum32 = _mm256_setzero_si256();
-      for (j = 0; j <= width - 16; j += 16) {
-        const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j));
-        const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j));
-        const __m256i flt1_16b = _mm256_permute4x64_epi64(
-            _mm256_packs_epi32(yy_loadu_256(flt1 + j),
-                               yy_loadu_256(flt1 + j + 8)),
-            0xd8);
-        const __m256i v0 =
-            _mm256_madd_epi16(xq_coeff, _mm256_unpacklo_epi16(flt1_16b, d0));
-        const __m256i v1 =
-            _mm256_madd_epi16(xq_coeff, _mm256_unpackhi_epi16(flt1_16b, d0));
-        const __m256i vr0 =
-            _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift);
-        const __m256i vr1 =
-            _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift);
-        const __m256i e0 = _mm256_sub_epi16(
-            _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0);
-        const __m256i err0 = _mm256_madd_epi16(e0, e0);
-        sum32 = _mm256_add_epi32(sum32, err0);
-      }
-      for (k = j; k < width; ++k) {
-        const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
-        int32_t v = xq[1] * (flt1[k] - u);
-        const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt1 += flt1_stride;
-      const __m256i sum64_0 =
-          _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32));
-      const __m256i sum64_1 =
-          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1));
-      sum64 = _mm256_add_epi64(sum64, sum64_0);
-      sum64 = _mm256_add_epi64(sum64, sum64_1);
-    }
-  } else {
-    __m256i sum32 = _mm256_setzero_si256();
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j <= width - 16; j += 16) {
-        const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j));
-        const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j));
-        const __m256i diff0 = _mm256_sub_epi16(d0, s0);
-        const __m256i err0 = _mm256_madd_epi16(diff0, diff0);
-        sum32 = _mm256_add_epi32(sum32, err0);
-      }
-      for (k = j; k < width; ++k) {
-        const int32_t e = (int32_t)(dat[k]) - src[k];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-    }
-    const __m256i sum64_0 =
-        _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32));
-    const __m256i sum64_1 =
-        _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1));
-    sum64 = _mm256_add_epi64(sum64_0, sum64_1);
-  }
-  int64_t sum[4];
-  yy_storeu_256(sum, sum64);
-  err += sum[0] + sum[1] + sum[2] + sum[3];
-  return err;
-}
diff --git a/third_party/aom/av1/encoder/x86/pickrst_sse4.c b/third_party/aom/av1/encoder/x86/pickrst_sse4.c
deleted file mode 100644
index 04e4d1afc..000000000
--- a/third_party/aom/av1/encoder/x86/pickrst_sse4.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <emmintrin.h>
-#include "aom_dsp/x86/synonyms.h"
-
-#include "config/av1_rtcd.h"
-#include "av1/common/restoration.h"
-#include "av1/encoder/pickrst.h"
-
-static INLINE void acc_stat_sse41(int32_t *dst, const uint8_t *src,
-                                  const __m128i *shuffle, const __m128i *kl) {
-  const __m128i s = _mm_shuffle_epi8(xx_loadu_128(src), *shuffle);
-  const __m128i d0 = _mm_madd_epi16(*kl, _mm_cvtepu8_epi16(s));
-  const __m128i d1 =
-      _mm_madd_epi16(*kl, _mm_cvtepu8_epi16(_mm_srli_si128(s, 8)));
-  const __m128i dst0 = xx_loadu_128(dst);
-  const __m128i dst1 = xx_loadu_128(dst + 4);
-  const __m128i r0 = _mm_add_epi32(dst0, d0);
-  const __m128i r1 = _mm_add_epi32(dst1, d1);
-  xx_storeu_128(dst, r0);
-  xx_storeu_128(dst + 4, r1);
-}
-
-static INLINE void acc_stat_win7_one_line_sse4_1(
-    const uint8_t *dgd, const uint8_t *src, int h_start, int h_end,
-    int dgd_stride, const __m128i *shuffle, int32_t *sumX,
-    int32_t sumY[WIENER_WIN][WIENER_WIN], int32_t M_int[WIENER_WIN][WIENER_WIN],
-    int32_t H_int[WIENER_WIN2][WIENER_WIN * 8]) {
-  const int wiener_win = 7;
-  int j, k, l;
-  for (j = h_start; j < h_end; j += 2) {
-    const uint8_t *dgd_ij = dgd + j;
-    const uint8_t X1 = src[j];
-    const uint8_t X2 = src[j + 1];
-    *sumX += X1 + X2;
-    for (k = 0; k < wiener_win; k++) {
-      const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride;
-      for (l = 0; l < wiener_win; l++) {
-        int32_t *H_ = &H_int[(l * wiener_win + k)][0];
-        const uint8_t D1 = dgd_ijk[l];
-        const uint8_t D2 = dgd_ijk[l + 1];
-        sumY[k][l] += D1 + D2;
-        M_int[k][l] += D1 * X1 + D2 * X2;
-
-        const __m128i kl =
-            _mm_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
-        acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 5 * 8, dgd_ij + 5 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 6 * 8, dgd_ij + 6 * dgd_stride, shuffle, &kl);
-      }
-    }
-  }
-}
-
-static INLINE void compute_stats_win7_opt_sse4_1(
-    const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start,
-    int v_end, int dgd_stride, int src_stride, double *M, double *H) {
-  int i, j, k, l, m, n;
-  const int wiener_win = WIENER_WIN;
-  const int pixel_count = (h_end - h_start) * (v_end - v_start);
-  const int wiener_win2 = wiener_win * wiener_win;
-  const int wiener_halfwin = (wiener_win >> 1);
-  const double avg =
-      find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
-  int32_t M_int32[WIENER_WIN][WIENER_WIN] = { { 0 } };
-  int64_t M_int64[WIENER_WIN][WIENER_WIN] = { { 0 } };
-  int32_t H_int32[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } };
-  int64_t H_int64[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } };
-  int32_t sumY[WIENER_WIN][WIENER_WIN] = { { 0 } };
-  int32_t sumX = 0;
-  const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
-  const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data);
-  for (j = v_start; j < v_end; j += 64) {
-    const int vert_end = AOMMIN(64, v_end - j) + j;
-    for (i = j; i < vert_end; i++) {
-      acc_stat_win7_one_line_sse4_1(
-          dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end,
-          dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32);
-    }
-    for (k = 0; k < wiener_win; ++k) {
-      for (l = 0; l < wiener_win; ++l) {
-        M_int64[k][l] += M_int32[k][l];
-        M_int32[k][l] = 0;
-      }
-    }
-    for (k = 0; k < WIENER_WIN2; ++k) {
-      for (l = 0; l < WIENER_WIN * 8; ++l) {
-        H_int64[k][l] += H_int32[k][l];
-        H_int32[k][l] = 0;
-      }
-    }
-  }
-
-  const double avg_square_sum = avg * avg * pixel_count;
-  for (k = 0; k < wiener_win; k++) {
-    for (l = 0; l < wiener_win; l++) {
-      const int32_t idx0 = l * wiener_win + k;
-      M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]);
-      double *H_ = H + idx0 * wiener_win2;
-      int64_t *H_int_ = &H_int64[idx0][0];
-      for (m = 0; m < wiener_win; m++) {
-        for (n = 0; n < wiener_win; n++) {
-          H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum -
-                                   avg * (sumY[k][l] + sumY[n][m]);
-        }
-      }
-    }
-  }
-}
-
-static INLINE void acc_stat_win5_one_line_sse4_1(
-    const uint8_t *dgd, const uint8_t *src, int h_start, int h_end,
-    int dgd_stride, const __m128i *shuffle, int32_t *sumX,
-    int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA],
-    int32_t M_int[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA],
-    int32_t H_int[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8]) {
-  const int wiener_win = WIENER_WIN_CHROMA;
-  int j, k, l;
-  for (j = h_start; j < h_end; j += 2) {
-    const uint8_t *dgd_ij = dgd + j;
-    const uint8_t X1 = src[j];
-    const uint8_t X2 = src[j + 1];
-    *sumX += X1 + X2;
-    for (k = 0; k < wiener_win; k++) {
-      const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride;
-      for (l = 0; l < wiener_win; l++) {
-        int32_t *H_ = &H_int[(l * wiener_win + k)][0];
-        const uint8_t D1 = dgd_ijk[l];
-        const uint8_t D2 = dgd_ijk[l + 1];
-        sumY[k][l] += D1 + D2;
-        M_int[k][l] += D1 * X1 + D2 * X2;
-
-        const __m128i kl =
-            _mm_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
-        acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl);
-        acc_stat_sse41(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl);
-      }
-    }
-  }
-}
-
-static INLINE void compute_stats_win5_opt_sse4_1(
-    const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start,
-    int v_end, int dgd_stride, int src_stride, double *M, double *H) {
-  int i, j, k, l, m, n;
-  const int wiener_win = WIENER_WIN_CHROMA;
-  const int pixel_count = (h_end - h_start) * (v_end - v_start);
-  const int wiener_win2 = wiener_win * wiener_win;
-  const int wiener_halfwin = (wiener_win >> 1);
-  const double avg =
-      find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
-
-  int32_t M_int32[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
-  int64_t M_int64[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
-  int32_t H_int32[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } };
-  int64_t H_int64[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } };
-  int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } };
-  int32_t sumX = 0;
-  const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin;
-
-  const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data);
-  for (j = v_start; j < v_end; j += 64) {
-    const int vert_end = AOMMIN(64, v_end - j) + j;
-    for (i = j; i < vert_end; i++) {
-      acc_stat_win5_one_line_sse4_1(
-          dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end,
-          dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32);
-    }
-    for (k = 0; k < wiener_win; ++k) {
-      for (l = 0; l < wiener_win; ++l) {
-        M_int64[k][l] += M_int32[k][l];
-        M_int32[k][l] = 0;
-      }
-    }
-    for (k = 0; k < WIENER_WIN_CHROMA * WIENER_WIN_CHROMA; ++k) {
-      for (l = 0; l < WIENER_WIN_CHROMA * 8; ++l) {
-        H_int64[k][l] += H_int32[k][l];
-        H_int32[k][l] = 0;
-      }
-    }
-  }
-
-  const double avg_square_sum = avg * avg * pixel_count;
-  for (k = 0; k < wiener_win; k++) {
-    for (l = 0; l < wiener_win; l++) {
-      const int32_t idx0 = l * wiener_win + k;
-      M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]);
-      double *H_ = H + idx0 * wiener_win2;
-      int64_t *H_int_ = &H_int64[idx0][0];
-      for (m = 0; m < wiener_win; m++) {
-        for (n = 0; n < wiener_win; n++) {
-          H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum -
-                                   avg * (sumY[k][l] + sumY[n][m]);
-        }
-      }
-    }
-  }
-}
-void av1_compute_stats_sse4_1(int wiener_win, const uint8_t *dgd,
-                              const uint8_t *src, int h_start, int h_end,
-                              int v_start, int v_end, int dgd_stride,
-                              int src_stride, double *M, double *H) {
-  if (wiener_win == WIENER_WIN) {
-    compute_stats_win7_opt_sse4_1(dgd, src, h_start, h_end, v_start, v_end,
-                                  dgd_stride, src_stride, M, H);
-  } else if (wiener_win == WIENER_WIN_CHROMA) {
-    compute_stats_win5_opt_sse4_1(dgd, src, h_start, h_end, v_start, v_end,
-                                  dgd_stride, src_stride, M, H);
-  } else {
-    av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end,
-                        dgd_stride, src_stride, M, H);
-  }
-}
-
-static INLINE __m128i pair_set_epi16(uint16_t a, uint16_t b) {
-  return _mm_set1_epi32((int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)));
-}
-
-int64_t av1_lowbd_pixel_proj_error_sse4_1(
-    const uint8_t *src8, int width, int height, int src_stride,
-    const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride,
-    int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params) {
-  int i, j, k;
-  const int32_t shift = SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS;
-  const __m128i rounding = _mm_set1_epi32(1 << (shift - 1));
-  __m128i sum64 = _mm_setzero_si128();
-  const uint8_t *src = src8;
-  const uint8_t *dat = dat8;
-  int64_t err = 0;
-  if (params->r[0] > 0 && params->r[1] > 0) {
-    __m128i xq_coeff = pair_set_epi16(xq[0], xq[1]);
-    for (i = 0; i < height; ++i) {
-      __m128i sum32 = _mm_setzero_si128();
-      for (j = 0; j < width - 8; j += 8) {
-        const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j));
-        const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j));
-        const __m128i flt0_16b =
-            _mm_packs_epi32(xx_loadu_128(flt0 + j), xx_loadu_128(flt0 + j + 4));
-        const __m128i flt1_16b =
-            _mm_packs_epi32(xx_loadu_128(flt1 + j), xx_loadu_128(flt1 + j + 4));
-        const __m128i u0 = _mm_slli_epi16(d0, SGRPROJ_RST_BITS);
-        const __m128i flt0_0_sub_u = _mm_sub_epi16(flt0_16b, u0);
-        const __m128i flt1_0_sub_u = _mm_sub_epi16(flt1_16b, u0);
-        const __m128i v0 = _mm_madd_epi16(
-            xq_coeff, _mm_unpacklo_epi16(flt0_0_sub_u, flt1_0_sub_u));
-        const __m128i v1 = _mm_madd_epi16(
-            xq_coeff, _mm_unpackhi_epi16(flt0_0_sub_u, flt1_0_sub_u));
-        const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift);
-        const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift);
-        const __m128i e0 =
-            _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0);
-        const __m128i err0 = _mm_madd_epi16(e0, e0);
-        sum32 = _mm_add_epi32(sum32, err0);
-      }
-      for (k = j; k < width; ++k) {
-        const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
-        int32_t v = xq[0] * (flt0[k] - u) + xq[1] * (flt1[k] - u);
-        const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt0 += flt0_stride;
-      flt1 += flt1_stride;
-      const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32);
-      const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8));
-      sum64 = _mm_add_epi64(sum64, sum64_0);
-      sum64 = _mm_add_epi64(sum64, sum64_1);
-    }
-  } else if (params->r[0] > 0) {
-    __m128i xq_coeff = pair_set_epi16(xq[0], -(xq[0] << SGRPROJ_RST_BITS));
-    for (i = 0; i < height; ++i) {
-      __m128i sum32 = _mm_setzero_si128();
-      for (j = 0; j < width - 8; j += 8) {
-        const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j));
-        const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j));
-        const __m128i flt0_16b =
-            _mm_packs_epi32(xx_loadu_128(flt0 + j), xx_loadu_128(flt0 + j + 4));
-        const __m128i v0 =
-            _mm_madd_epi16(xq_coeff, _mm_unpacklo_epi16(flt0_16b, d0));
-        const __m128i v1 =
-            _mm_madd_epi16(xq_coeff, _mm_unpackhi_epi16(flt0_16b, d0));
-        const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift);
-        const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift);
-        const __m128i e0 =
-            _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0);
-        const __m128i err0 = _mm_madd_epi16(e0, e0);
-        sum32 = _mm_add_epi32(sum32, err0);
-      }
-      for (k = j; k < width; ++k) {
-        const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
-        int32_t v = xq[0] * (flt0[k] - u);
-        const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt0 += flt0_stride;
-      const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32);
-      const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8));
-      sum64 = _mm_add_epi64(sum64, sum64_0);
-      sum64 = _mm_add_epi64(sum64, sum64_1);
-    }
-  } else if (params->r[1] > 0) {
-    __m128i xq_coeff = pair_set_epi16(xq[1], -(xq[1] << SGRPROJ_RST_BITS));
-    for (i = 0; i < height; ++i) {
-      __m128i sum32 = _mm_setzero_si128();
-      for (j = 0; j < width - 8; j += 8) {
-        const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j));
-        const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j));
-        const __m128i flt1_16b =
-            _mm_packs_epi32(xx_loadu_128(flt1 + j), xx_loadu_128(flt1 + j + 4));
-        const __m128i v0 =
-            _mm_madd_epi16(xq_coeff, _mm_unpacklo_epi16(flt1_16b, d0));
-        const __m128i v1 =
-            _mm_madd_epi16(xq_coeff, _mm_unpackhi_epi16(flt1_16b, d0));
-        const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift);
-        const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift);
-        const __m128i e0 =
-            _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0);
-        const __m128i err0 = _mm_madd_epi16(e0, e0);
-        sum32 = _mm_add_epi32(sum32, err0);
-      }
-      for (k = j; k < width; ++k) {
-        const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS);
-        int32_t v = xq[1] * (flt1[k] - u);
-        const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-      flt1 += flt1_stride;
-      const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32);
-      const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8));
-      sum64 = _mm_add_epi64(sum64, sum64_0);
-      sum64 = _mm_add_epi64(sum64, sum64_1);
-    }
-  } else {
-    __m128i sum32 = _mm_setzero_si128();
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j < width - 16; j += 16) {
-        const __m128i d = xx_loadu_128(dat + j);
-        const __m128i s = xx_loadu_128(src + j);
-        const __m128i d0 = _mm_cvtepu8_epi16(d);
-        const __m128i d1 = _mm_cvtepu8_epi16(_mm_srli_si128(d, 8));
-        const __m128i s0 = _mm_cvtepu8_epi16(s);
-        const __m128i s1 = _mm_cvtepu8_epi16(_mm_srli_si128(s, 8));
-        const __m128i diff0 = _mm_sub_epi16(d0, s0);
-        const __m128i diff1 = _mm_sub_epi16(d1, s1);
-        const __m128i err0 = _mm_madd_epi16(diff0, diff0);
-        const __m128i err1 = _mm_madd_epi16(diff1, diff1);
-        sum32 = _mm_add_epi32(sum32, err0);
-        sum32 = _mm_add_epi32(sum32, err1);
-      }
-      for (k = j; k < width; ++k) {
-        const int32_t e = (int32_t)(dat[k]) - src[k];
-        err += e * e;
-      }
-      dat += dat_stride;
-      src += src_stride;
-    }
-    const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32);
-    const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8));
-    sum64 = _mm_add_epi64(sum64_0, sum64_1);
-  }
-  int64_t sum[2];
-  xx_storeu_128(sum, sum64);
-  err += sum[0] + sum[1];
-  return err;
-}
diff --git a/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm b/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm
deleted file mode 100644
index 30983d1c1..000000000
--- a/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm
+++ /dev/null
@@ -1,217 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-
-%include "aom_ports/x86_abi_support.asm"
-
-SECTION .text
-
-; void av1_temporal_filter_apply_sse2 | arg
-;  (unsigned char  *frame1,           |  0
-;   unsigned int    stride,           |  1
-;   unsigned char  *frame2,           |  2
-;   unsigned int    block_width,      |  3
-;   unsigned int    block_height,     |  4
-;   int             strength,         |  5
-;   int             filter_weight,    |  6
-;   unsigned int   *accumulator,      |  7
-;   unsigned short *count)            |  8
-global sym(av1_temporal_filter_apply_sse2) PRIVATE
-sym(av1_temporal_filter_apply_sse2):
-
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 9
-    SAVE_XMM 7
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ALIGN_STACK 16, rax
-    %define block_width    0
-    %define block_height  16
-    %define strength      32
-    %define filter_weight 48
-    %define rounding_bit  64
-    %define rbp_backup    80
-    %define stack_size    96
-    sub         rsp,           stack_size
-    mov         [rsp + rbp_backup], rbp
-    ; end prolog
-
-        mov         edx,            arg(3)
-        mov         [rsp + block_width], rdx
-        mov         edx,            arg(4)
-        mov         [rsp + block_height], rdx
-        movd        xmm6,           arg(5)
-        movdqa      [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
-
-        ; calculate the rounding bit outside the loop
-        ; 0x8000 >> (16 - strength)
-        mov         rdx,            16
-        sub         rdx,            arg(5) ; 16 - strength
-        movq        xmm4,           rdx    ; can't use rdx w/ shift
-        movdqa      xmm5,           [GLOBAL(_const_top_bit)]
-        psrlw       xmm5,           xmm4
-        movdqa      [rsp + rounding_bit], xmm5
-
-        mov         rsi,            arg(0) ; src/frame1
-        mov         rdx,            arg(2) ; predictor frame
-        mov         rdi,            arg(7) ; accumulator
-        mov         rax,            arg(8) ; count
-
-        ; dup the filter weight and store for later
-        movd        xmm0,           arg(6) ; filter_weight
-        pshuflw     xmm0,           xmm0, 0
-        punpcklwd   xmm0,           xmm0
-        movdqa      [rsp + filter_weight], xmm0
-
-        mov         rbp,            arg(1) ; stride
-        pxor        xmm7,           xmm7   ; zero for extraction
-
-        mov         rcx,            [rsp + block_width]
-        imul        rcx,            [rsp + block_height]
-        add         rcx,            rdx
-        cmp         dword ptr [rsp + block_width], 8
-        jne         .temporal_filter_apply_load_16
-
-.temporal_filter_apply_load_8:
-        movq        xmm0,           [rsi]  ; first row
-        lea         rsi,            [rsi + rbp] ; += stride
-        punpcklbw   xmm0,           xmm7   ; src[ 0- 7]
-        movq        xmm1,           [rsi]  ; second row
-        lea         rsi,            [rsi + rbp] ; += stride
-        punpcklbw   xmm1,           xmm7   ; src[ 8-15]
-        jmp         .temporal_filter_apply_load_finished
-
-.temporal_filter_apply_load_16:
-        movdqa      xmm0,           [rsi]  ; src (frame1)
-        lea         rsi,            [rsi + rbp] ; += stride
-        movdqa      xmm1,           xmm0
-        punpcklbw   xmm0,           xmm7   ; src[ 0- 7]
-        punpckhbw   xmm1,           xmm7   ; src[ 8-15]
-
-.temporal_filter_apply_load_finished:
-        movdqa      xmm2,           [rdx]  ; predictor (frame2)
-        movdqa      xmm3,           xmm2
-        punpcklbw   xmm2,           xmm7   ; pred[ 0- 7]
-        punpckhbw   xmm3,           xmm7   ; pred[ 8-15]
-
-        ; modifier = src_byte - pixel_value
-        psubw       xmm0,           xmm2   ; src - pred[ 0- 7]
-        psubw       xmm1,           xmm3   ; src - pred[ 8-15]
-
-        ; modifier *= modifier
-        pmullw      xmm0,           xmm0   ; modifer[ 0- 7]^2
-        pmullw      xmm1,           xmm1   ; modifer[ 8-15]^2
-
-        ; modifier *= 3
-        pmullw      xmm0,           [GLOBAL(_const_3w)]
-        pmullw      xmm1,           [GLOBAL(_const_3w)]
-
-        ; modifer += 0x8000 >> (16 - strength)
-        paddw       xmm0,           [rsp + rounding_bit]
-        paddw       xmm1,           [rsp + rounding_bit]
-
-        ; modifier >>= strength
-        psrlw       xmm0,           [rsp + strength]
-        psrlw       xmm1,           [rsp + strength]
-
-        ; modifier = 16 - modifier
-        ; saturation takes care of modifier > 16
-        movdqa      xmm3,           [GLOBAL(_const_16w)]
-        movdqa      xmm2,           [GLOBAL(_const_16w)]
-        psubusw     xmm3,           xmm1
-        psubusw     xmm2,           xmm0
-
-        ; modifier *= filter_weight
-        pmullw      xmm2,           [rsp + filter_weight]
-        pmullw      xmm3,           [rsp + filter_weight]
-
-        ; count
-        movdqa      xmm4,           [rax]
-        movdqa      xmm5,           [rax+16]
-        ; += modifier
-        paddw       xmm4,           xmm2
-        paddw       xmm5,           xmm3
-        ; write back
-        movdqa      [rax],          xmm4
-        movdqa      [rax+16],       xmm5
-        lea         rax,            [rax + 16*2] ; count += 16*(sizeof(short))
-
-        ; load and extract the predictor up to shorts
-        pxor        xmm7,           xmm7
-        movdqa      xmm0,           [rdx]
-        lea         rdx,            [rdx + 16*1] ; pred += 16*(sizeof(char))
-        movdqa      xmm1,           xmm0
-        punpcklbw   xmm0,           xmm7   ; pred[ 0- 7]
-        punpckhbw   xmm1,           xmm7   ; pred[ 8-15]
-
-        ; modifier *= pixel_value
-        pmullw      xmm0,           xmm2
-        pmullw      xmm1,           xmm3
-
-        ; expand to double words
-        movdqa      xmm2,           xmm0
-        punpcklwd   xmm0,           xmm7   ; [ 0- 3]
-        punpckhwd   xmm2,           xmm7   ; [ 4- 7]
-        movdqa      xmm3,           xmm1
-        punpcklwd   xmm1,           xmm7   ; [ 8-11]
-        punpckhwd   xmm3,           xmm7   ; [12-15]
-
-        ; accumulator
-        movdqa      xmm4,           [rdi]
-        movdqa      xmm5,           [rdi+16]
-        movdqa      xmm6,           [rdi+32]
-        movdqa      xmm7,           [rdi+48]
-        ; += modifier
-        paddd       xmm4,           xmm0
-        paddd       xmm5,           xmm2
-        paddd       xmm6,           xmm1
-        paddd       xmm7,           xmm3
-        ; write back
-        movdqa      [rdi],          xmm4
-        movdqa      [rdi+16],       xmm5
-        movdqa      [rdi+32],       xmm6
-        movdqa      [rdi+48],       xmm7
-        lea         rdi,            [rdi + 16*4] ; accumulator += 16*(sizeof(int))
-
-        cmp         rdx,            rcx
-        je          .temporal_filter_apply_epilog
-        pxor        xmm7,           xmm7   ; zero for extraction
-        cmp         dword ptr [rsp + block_width], 16
-        je          .temporal_filter_apply_load_16
-        jmp         .temporal_filter_apply_load_8
-
-.temporal_filter_apply_epilog:
-    ; begin epilog
-    mov         rbp,            [rsp + rbp_backup]
-    add         rsp,            stack_size
-    pop         rsp
-    pop         rdi
-    pop         rsi
-    RESTORE_GOT
-    RESTORE_XMM
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-SECTION_RODATA
-align 16
-_const_3w:
-    times 8 dw 3
-align 16
-_const_top_bit:
-    times 8 dw 1<<15
-align 16
-_const_16w:
-    times 8 dw 16
diff --git a/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c b/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c
deleted file mode 100644
index 2a792f14e..000000000
--- a/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (c) 2018, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h>
-#include <smmintrin.h>
-
-#include "aom_dsp/x86/synonyms.h"
-#include "aom_dsp/x86/synonyms_avx2.h"
-#include "aom/aom_integer.h"
-
-#include "av1/common/reconinter.h"
-
-#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
-
-/**
- * See av1_wedge_sse_from_residuals_c
- */
-uint64_t av1_wedge_sse_from_residuals_avx2(const int16_t *r1, const int16_t *d,
-                                           const uint8_t *m, int N) {
-  int n = -N;
-
-  uint64_t csse;
-
-  const __m256i v_mask_max_w = _mm256_set1_epi16(MAX_MASK_VALUE);
-  const __m256i v_zext_q = yy_set1_64_from_32i(0xffffffff);
-
-  __m256i v_acc0_q = _mm256_setzero_si256();
-
-  assert(N % 64 == 0);
-
-  r1 += N;
-  d += N;
-  m += N;
-
-  do {
-    const __m256i v_r0_w = _mm256_lddqu_si256((__m256i *)(r1 + n));
-    const __m256i v_d0_w = _mm256_lddqu_si256((__m256i *)(d + n));
-    const __m128i v_m01_b = _mm_lddqu_si128((__m128i *)(m + n));
-
-    const __m256i v_rd0l_w = _mm256_unpacklo_epi16(v_d0_w, v_r0_w);
-    const __m256i v_rd0h_w = _mm256_unpackhi_epi16(v_d0_w, v_r0_w);
-    const __m256i v_m0_w = _mm256_cvtepu8_epi16(v_m01_b);
-
-    const __m256i v_m0l_w = _mm256_unpacklo_epi16(v_m0_w, v_mask_max_w);
-    const __m256i v_m0h_w = _mm256_unpackhi_epi16(v_m0_w, v_mask_max_w);
-
-    const __m256i v_t0l_d = _mm256_madd_epi16(v_rd0l_w, v_m0l_w);
-    const __m256i v_t0h_d = _mm256_madd_epi16(v_rd0h_w, v_m0h_w);
-
-    const __m256i v_t0_w = _mm256_packs_epi32(v_t0l_d, v_t0h_d);
-
-    const __m256i v_sq0_d = _mm256_madd_epi16(v_t0_w, v_t0_w);
-
-    const __m256i v_sum0_q = _mm256_add_epi64(
-        _mm256_and_si256(v_sq0_d, v_zext_q), _mm256_srli_epi64(v_sq0_d, 32));
-
-    v_acc0_q = _mm256_add_epi64(v_acc0_q, v_sum0_q);
-
-    n += 16;
-  } while (n);
-
-  v_acc0_q = _mm256_add_epi64(v_acc0_q, _mm256_srli_si256(v_acc0_q, 8));
-  __m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc0_q);
-  __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc0_q, 1);
-  v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
-#if ARCH_X86_64
-  csse = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0);
-#else
-  xx_storel_64(&csse, v_acc_q_0);
-#endif
-
-  return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
-}
-
-/**
- * See av1_wedge_sign_from_residuals_c
- */
-int av1_wedge_sign_from_residuals_avx2(const int16_t *ds, const uint8_t *m,
-                                       int N, int64_t limit) {
-  int64_t acc;
-  __m256i v_acc0_d = _mm256_setzero_si256();
-
-  // Input size limited to 8192 by the use of 32 bit accumulators and m
-  // being between [0, 64]. Overflow might happen at larger sizes,
-  // though it is practically impossible on real video input.
-  assert(N < 8192);
-  assert(N % 64 == 0);
-
-  do {
-    const __m256i v_m01_b = _mm256_lddqu_si256((__m256i *)(m));
-    const __m256i v_m23_b = _mm256_lddqu_si256((__m256i *)(m + 32));
-
-    const __m256i v_d0_w = _mm256_lddqu_si256((__m256i *)(ds));
-    const __m256i v_d1_w = _mm256_lddqu_si256((__m256i *)(ds + 16));
-    const __m256i v_d2_w = _mm256_lddqu_si256((__m256i *)(ds + 32));
-    const __m256i v_d3_w = _mm256_lddqu_si256((__m256i *)(ds + 48));
-
-    const __m256i v_m0_w =
-        _mm256_cvtepu8_epi16(_mm256_castsi256_si128(v_m01_b));
-    const __m256i v_m1_w =
-        _mm256_cvtepu8_epi16(_mm256_extracti128_si256(v_m01_b, 1));
-    const __m256i v_m2_w =
-        _mm256_cvtepu8_epi16(_mm256_castsi256_si128(v_m23_b));
-    const __m256i v_m3_w =
-        _mm256_cvtepu8_epi16(_mm256_extracti128_si256(v_m23_b, 1));
-
-    const __m256i v_p0_d = _mm256_madd_epi16(v_d0_w, v_m0_w);
-    const __m256i v_p1_d = _mm256_madd_epi16(v_d1_w, v_m1_w);
-    const __m256i v_p2_d = _mm256_madd_epi16(v_d2_w, v_m2_w);
-    const __m256i v_p3_d = _mm256_madd_epi16(v_d3_w, v_m3_w);
-
-    const __m256i v_p01_d = _mm256_add_epi32(v_p0_d, v_p1_d);
-    const __m256i v_p23_d = _mm256_add_epi32(v_p2_d, v_p3_d);
-
-    const __m256i v_p0123_d = _mm256_add_epi32(v_p01_d, v_p23_d);
-
-    v_acc0_d = _mm256_add_epi32(v_acc0_d, v_p0123_d);
-
-    ds += 64;
-    m += 64;
-
-    N -= 64;
-  } while (N);
-
-  __m256i v_sign_d = _mm256_srai_epi32(v_acc0_d, 31);
-  v_acc0_d = _mm256_add_epi64(_mm256_unpacklo_epi32(v_acc0_d, v_sign_d),
-                              _mm256_unpackhi_epi32(v_acc0_d, v_sign_d));
-
-  __m256i v_acc_q = _mm256_add_epi64(v_acc0_d, _mm256_srli_si256(v_acc0_d, 8));
-
-  __m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc_q);
-  __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc_q, 1);
-  v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
-
-#if ARCH_X86_64
-  acc = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0);
-#else
-  xx_storel_64(&acc, v_acc_q_0);
-#endif
-
-  return acc > limit;
-}
-
-/**
- * av1_wedge_compute_delta_squares_c
- */
-void av1_wedge_compute_delta_squares_avx2(int16_t *d, const int16_t *a,
-                                          const int16_t *b, int N) {
-  const __m256i v_neg_w = _mm256_set1_epi32(0xffff0001);
-
-  assert(N % 64 == 0);
-
-  do {
-    const __m256i v_a0_w = _mm256_lddqu_si256((__m256i *)(a));
-    const __m256i v_b0_w = _mm256_lddqu_si256((__m256i *)(b));
-    const __m256i v_a1_w = _mm256_lddqu_si256((__m256i *)(a + 16));
-    const __m256i v_b1_w = _mm256_lddqu_si256((__m256i *)(b + 16));
-    const __m256i v_a2_w = _mm256_lddqu_si256((__m256i *)(a + 32));
-    const __m256i v_b2_w = _mm256_lddqu_si256((__m256i *)(b + 32));
-    const __m256i v_a3_w = _mm256_lddqu_si256((__m256i *)(a + 48));
-    const __m256i v_b3_w = _mm256_lddqu_si256((__m256i *)(b + 48));
-
-    const __m256i v_ab0l_w = _mm256_unpacklo_epi16(v_a0_w, v_b0_w);
-    const __m256i v_ab0h_w = _mm256_unpackhi_epi16(v_a0_w, v_b0_w);
-    const __m256i v_ab1l_w = _mm256_unpacklo_epi16(v_a1_w, v_b1_w);
-    const __m256i v_ab1h_w = _mm256_unpackhi_epi16(v_a1_w, v_b1_w);
-    const __m256i v_ab2l_w = _mm256_unpacklo_epi16(v_a2_w, v_b2_w);
-    const __m256i v_ab2h_w = _mm256_unpackhi_epi16(v_a2_w, v_b2_w);
-    const __m256i v_ab3l_w = _mm256_unpacklo_epi16(v_a3_w, v_b3_w);
-    const __m256i v_ab3h_w = _mm256_unpackhi_epi16(v_a3_w, v_b3_w);
-
-    // Negate top word of pairs
-    const __m256i v_abl0n_w = _mm256_sign_epi16(v_ab0l_w, v_neg_w);
-    const __m256i v_abh0n_w = _mm256_sign_epi16(v_ab0h_w, v_neg_w);
-    const __m256i v_abl1n_w = _mm256_sign_epi16(v_ab1l_w, v_neg_w);
-    const __m256i v_abh1n_w = _mm256_sign_epi16(v_ab1h_w, v_neg_w);
-    const __m256i v_abl2n_w = _mm256_sign_epi16(v_ab2l_w, v_neg_w);
-    const __m256i v_abh2n_w = _mm256_sign_epi16(v_ab2h_w, v_neg_w);
-    const __m256i v_abl3n_w = _mm256_sign_epi16(v_ab3l_w, v_neg_w);
-    const __m256i v_abh3n_w = _mm256_sign_epi16(v_ab3h_w, v_neg_w);
-
-    const __m256i v_r0l_w = _mm256_madd_epi16(v_ab0l_w, v_abl0n_w);
-    const __m256i v_r0h_w = _mm256_madd_epi16(v_ab0h_w, v_abh0n_w);
-    const __m256i v_r1l_w = _mm256_madd_epi16(v_ab1l_w, v_abl1n_w);
-    const __m256i v_r1h_w = _mm256_madd_epi16(v_ab1h_w, v_abh1n_w);
-    const __m256i v_r2l_w = _mm256_madd_epi16(v_ab2l_w, v_abl2n_w);
-    const __m256i v_r2h_w = _mm256_madd_epi16(v_ab2h_w, v_abh2n_w);
-    const __m256i v_r3l_w = _mm256_madd_epi16(v_ab3l_w, v_abl3n_w);
-    const __m256i v_r3h_w = _mm256_madd_epi16(v_ab3h_w, v_abh3n_w);
-
-    const __m256i v_r0_w = _mm256_packs_epi32(v_r0l_w, v_r0h_w);
-    const __m256i v_r1_w = _mm256_packs_epi32(v_r1l_w, v_r1h_w);
-    const __m256i v_r2_w = _mm256_packs_epi32(v_r2l_w, v_r2h_w);
-    const __m256i v_r3_w = _mm256_packs_epi32(v_r3l_w, v_r3h_w);
-
-    _mm256_store_si256((__m256i *)(d), v_r0_w);
-    _mm256_store_si256((__m256i *)(d + 16), v_r1_w);
-    _mm256_store_si256((__m256i *)(d + 32), v_r2_w);
-    _mm256_store_si256((__m256i *)(d + 48), v_r3_w);
-
-    a += 64;
-    b += 64;
-    d += 64;
-    N -= 64;
-  } while (N);
-}
diff --git a/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c b/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c
deleted file mode 100644
index 4d2e99f25..000000000
--- a/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <immintrin.h>
-
-#include "aom_dsp/x86/synonyms.h"
-
-#include "aom/aom_integer.h"
-
-#include "av1/common/reconinter.h"
-
-#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
-
-/**
- * See av1_wedge_sse_from_residuals_c
- */
-uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d,
-                                           const uint8_t *m, int N) {
-  int n = -N;
-  int n8 = n + 8;
-
-  uint64_t csse;
-
-  const __m128i v_mask_max_w = _mm_set1_epi16(MAX_MASK_VALUE);
-  const __m128i v_zext_q = xx_set1_64_from_32i(0xffffffff);
-
-  __m128i v_acc0_q = _mm_setzero_si128();
-
-  assert(N % 64 == 0);
-
-  r1 += N;
-  d += N;
-  m += N;
-
-  do {
-    const __m128i v_r0_w = xx_load_128(r1 + n);
-    const __m128i v_r1_w = xx_load_128(r1 + n8);
-    const __m128i v_d0_w = xx_load_128(d + n);
-    const __m128i v_d1_w = xx_load_128(d + n8);
-    const __m128i v_m01_b = xx_load_128(m + n);
-
-    const __m128i v_rd0l_w = _mm_unpacklo_epi16(v_d0_w, v_r0_w);
-    const __m128i v_rd0h_w = _mm_unpackhi_epi16(v_d0_w, v_r0_w);
-    const __m128i v_rd1l_w = _mm_unpacklo_epi16(v_d1_w, v_r1_w);
-    const __m128i v_rd1h_w = _mm_unpackhi_epi16(v_d1_w, v_r1_w);
-    const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128());
-    const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128());
-
-    const __m128i v_m0l_w = _mm_unpacklo_epi16(v_m0_w, v_mask_max_w);
-    const __m128i v_m0h_w = _mm_unpackhi_epi16(v_m0_w, v_mask_max_w);
-    const __m128i v_m1l_w = _mm_unpacklo_epi16(v_m1_w, v_mask_max_w);
-    const __m128i v_m1h_w = _mm_unpackhi_epi16(v_m1_w, v_mask_max_w);
-
-    const __m128i v_t0l_d = _mm_madd_epi16(v_rd0l_w, v_m0l_w);
-    const __m128i v_t0h_d = _mm_madd_epi16(v_rd0h_w, v_m0h_w);
-    const __m128i v_t1l_d = _mm_madd_epi16(v_rd1l_w, v_m1l_w);
-    const __m128i v_t1h_d = _mm_madd_epi16(v_rd1h_w, v_m1h_w);
-
-    const __m128i v_t0_w = _mm_packs_epi32(v_t0l_d, v_t0h_d);
-    const __m128i v_t1_w = _mm_packs_epi32(v_t1l_d, v_t1h_d);
-
-    const __m128i v_sq0_d = _mm_madd_epi16(v_t0_w, v_t0_w);
-    const __m128i v_sq1_d = _mm_madd_epi16(v_t1_w, v_t1_w);
-
-    const __m128i v_sum0_q = _mm_add_epi64(_mm_and_si128(v_sq0_d, v_zext_q),
-                                           _mm_srli_epi64(v_sq0_d, 32));
-    const __m128i v_sum1_q = _mm_add_epi64(_mm_and_si128(v_sq1_d, v_zext_q),
-                                           _mm_srli_epi64(v_sq1_d, 32));
-
-    v_acc0_q = _mm_add_epi64(v_acc0_q, v_sum0_q);
-    v_acc0_q = _mm_add_epi64(v_acc0_q, v_sum1_q);
-
-    n8 += 16;
-    n += 16;
-  } while (n);
-
-  v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_srli_si128(v_acc0_q, 8));
-
-#if ARCH_X86_64
-  csse = (uint64_t)_mm_cvtsi128_si64(v_acc0_q);
-#else
-  xx_storel_64(&csse, v_acc0_q);
-#endif
-
-  return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
-}
-
-/**
- * See av1_wedge_sign_from_residuals_c
- */
-int av1_wedge_sign_from_residuals_sse2(const int16_t *ds, const uint8_t *m,
-                                       int N, int64_t limit) {
-  int64_t acc;
-
-  __m128i v_sign_d;
-  __m128i v_acc0_d = _mm_setzero_si128();
-  __m128i v_acc1_d = _mm_setzero_si128();
-  __m128i v_acc_q;
-
-  // Input size limited to 8192 by the use of 32 bit accumulators and m
-  // being between [0, 64]. Overflow might happen at larger sizes,
-  // though it is practically impossible on real video input.
-  assert(N < 8192);
-  assert(N % 64 == 0);
-
-  do {
-    const __m128i v_m01_b = xx_load_128(m);
-    const __m128i v_m23_b = xx_load_128(m + 16);
-    const __m128i v_m45_b = xx_load_128(m + 32);
-    const __m128i v_m67_b = xx_load_128(m + 48);
-
-    const __m128i v_d0_w = xx_load_128(ds);
-    const __m128i v_d1_w = xx_load_128(ds + 8);
-    const __m128i v_d2_w = xx_load_128(ds + 16);
-    const __m128i v_d3_w = xx_load_128(ds + 24);
-    const __m128i v_d4_w = xx_load_128(ds + 32);
-    const __m128i v_d5_w = xx_load_128(ds + 40);
-    const __m128i v_d6_w = xx_load_128(ds + 48);
-    const __m128i v_d7_w = xx_load_128(ds + 56);
-
-    const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128());
-    const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128());
-    const __m128i v_m2_w = _mm_unpacklo_epi8(v_m23_b, _mm_setzero_si128());
-    const __m128i v_m3_w = _mm_unpackhi_epi8(v_m23_b, _mm_setzero_si128());
-    const __m128i v_m4_w = _mm_unpacklo_epi8(v_m45_b, _mm_setzero_si128());
-    const __m128i v_m5_w = _mm_unpackhi_epi8(v_m45_b, _mm_setzero_si128());
-    const __m128i v_m6_w = _mm_unpacklo_epi8(v_m67_b, _mm_setzero_si128());
-    const __m128i v_m7_w = _mm_unpackhi_epi8(v_m67_b, _mm_setzero_si128());
-
-    const __m128i v_p0_d = _mm_madd_epi16(v_d0_w, v_m0_w);
-    const __m128i v_p1_d = _mm_madd_epi16(v_d1_w, v_m1_w);
-    const __m128i v_p2_d = _mm_madd_epi16(v_d2_w, v_m2_w);
-    const __m128i v_p3_d = _mm_madd_epi16(v_d3_w, v_m3_w);
-    const __m128i v_p4_d = _mm_madd_epi16(v_d4_w, v_m4_w);
-    const __m128i v_p5_d = _mm_madd_epi16(v_d5_w, v_m5_w);
-    const __m128i v_p6_d = _mm_madd_epi16(v_d6_w, v_m6_w);
-    const __m128i v_p7_d = _mm_madd_epi16(v_d7_w, v_m7_w);
-
-    const __m128i v_p01_d = _mm_add_epi32(v_p0_d, v_p1_d);
-    const __m128i v_p23_d = _mm_add_epi32(v_p2_d, v_p3_d);
-    const __m128i v_p45_d = _mm_add_epi32(v_p4_d, v_p5_d);
-    const __m128i v_p67_d = _mm_add_epi32(v_p6_d, v_p7_d);
-
-    const __m128i v_p0123_d = _mm_add_epi32(v_p01_d, v_p23_d);
-    const __m128i v_p4567_d = _mm_add_epi32(v_p45_d, v_p67_d);
-
-    v_acc0_d = _mm_add_epi32(v_acc0_d, v_p0123_d);
-    v_acc1_d = _mm_add_epi32(v_acc1_d, v_p4567_d);
-
-    ds += 64;
-    m += 64;
-
-    N -= 64;
-  } while (N);
-
-  v_sign_d = _mm_cmplt_epi32(v_acc0_d, _mm_setzero_si128());
-  v_acc0_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc0_d, v_sign_d),
-                           _mm_unpackhi_epi32(v_acc0_d, v_sign_d));
-
-  v_sign_d = _mm_cmplt_epi32(v_acc1_d, _mm_setzero_si128());
-  v_acc1_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc1_d, v_sign_d),
-                           _mm_unpackhi_epi32(v_acc1_d, v_sign_d));
-
-  v_acc_q = _mm_add_epi64(v_acc0_d, v_acc1_d);
-
-  v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
-
-#if ARCH_X86_64
-  acc = (uint64_t)_mm_cvtsi128_si64(v_acc_q);
-#else
-  xx_storel_64(&acc, v_acc_q);
-#endif
-
-  return acc > limit;
-}
-
-// Negate under mask
-static INLINE __m128i negm_epi16(__m128i v_v_w, __m128i v_mask_w) {
-  return _mm_sub_epi16(_mm_xor_si128(v_v_w, v_mask_w), v_mask_w);
-}
-
-/**
- * av1_wedge_compute_delta_squares_c
- */
-void av1_wedge_compute_delta_squares_sse2(int16_t *d, const int16_t *a,
-                                          const int16_t *b, int N) {
-  const __m128i v_neg_w =
-      _mm_set_epi16(0xffff, 0, 0xffff, 0, 0xffff, 0, 0xffff, 0);
-
-  assert(N % 64 == 0);
-
-  do {
-    const __m128i v_a0_w = xx_load_128(a);
-    const __m128i v_b0_w = xx_load_128(b);
-    const __m128i v_a1_w = xx_load_128(a + 8);
-    const __m128i v_b1_w = xx_load_128(b + 8);
-    const __m128i v_a2_w = xx_load_128(a + 16);
-    const __m128i v_b2_w = xx_load_128(b + 16);
-    const __m128i v_a3_w = xx_load_128(a + 24);
-    const __m128i v_b3_w = xx_load_128(b + 24);
-
-    const __m128i v_ab0l_w = _mm_unpacklo_epi16(v_a0_w, v_b0_w);
-    const __m128i v_ab0h_w = _mm_unpackhi_epi16(v_a0_w, v_b0_w);
-    const __m128i v_ab1l_w = _mm_unpacklo_epi16(v_a1_w, v_b1_w);
-    const __m128i v_ab1h_w = _mm_unpackhi_epi16(v_a1_w, v_b1_w);
-    const __m128i v_ab2l_w = _mm_unpacklo_epi16(v_a2_w, v_b2_w);
-    const __m128i v_ab2h_w = _mm_unpackhi_epi16(v_a2_w, v_b2_w);
-    const __m128i v_ab3l_w = _mm_unpacklo_epi16(v_a3_w, v_b3_w);
-    const __m128i v_ab3h_w = _mm_unpackhi_epi16(v_a3_w, v_b3_w);
-
-    // Negate top word of pairs
-    const __m128i v_abl0n_w = negm_epi16(v_ab0l_w, v_neg_w);
-    const __m128i v_abh0n_w = negm_epi16(v_ab0h_w, v_neg_w);
-    const __m128i v_abl1n_w = negm_epi16(v_ab1l_w, v_neg_w);
-    const __m128i v_abh1n_w = negm_epi16(v_ab1h_w, v_neg_w);
-    const __m128i v_abl2n_w = negm_epi16(v_ab2l_w, v_neg_w);
-    const __m128i v_abh2n_w = negm_epi16(v_ab2h_w, v_neg_w);
-    const __m128i v_abl3n_w = negm_epi16(v_ab3l_w, v_neg_w);
-    const __m128i v_abh3n_w = negm_epi16(v_ab3h_w, v_neg_w);
-
-    const __m128i v_r0l_w = _mm_madd_epi16(v_ab0l_w, v_abl0n_w);
-    const __m128i v_r0h_w = _mm_madd_epi16(v_ab0h_w, v_abh0n_w);
-    const __m128i v_r1l_w = _mm_madd_epi16(v_ab1l_w, v_abl1n_w);
-    const __m128i v_r1h_w = _mm_madd_epi16(v_ab1h_w, v_abh1n_w);
-    const __m128i v_r2l_w = _mm_madd_epi16(v_ab2l_w, v_abl2n_w);
-    const __m128i v_r2h_w = _mm_madd_epi16(v_ab2h_w, v_abh2n_w);
-    const __m128i v_r3l_w = _mm_madd_epi16(v_ab3l_w, v_abl3n_w);
-    const __m128i v_r3h_w = _mm_madd_epi16(v_ab3h_w, v_abh3n_w);
-
-    const __m128i v_r0_w = _mm_packs_epi32(v_r0l_w, v_r0h_w);
-    const __m128i v_r1_w = _mm_packs_epi32(v_r1l_w, v_r1h_w);
-    const __m128i v_r2_w = _mm_packs_epi32(v_r2l_w, v_r2h_w);
-    const __m128i v_r3_w = _mm_packs_epi32(v_r3l_w, v_r3h_w);
-
-    xx_store_128(d, v_r0_w);
-    xx_store_128(d + 8, v_r1_w);
-    xx_store_128(d + 16, v_r2_w);
-    xx_store_128(d + 24, v_r3_w);
-
-    a += 32;
-    b += 32;
-    d += 32;
-    N -= 32;
-  } while (N);
-}