summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/encoder')
-rw-r--r--third_party/aom/av1/encoder/aq_cyclicrefresh.c5
-rw-r--r--third_party/aom/av1/encoder/av1_quantize.c45
-rw-r--r--third_party/aom/av1/encoder/av1_quantize.h2
-rw-r--r--third_party/aom/av1/encoder/bgsprite.c748
-rw-r--r--third_party/aom/av1/encoder/bgsprite.h30
-rw-r--r--third_party/aom/av1/encoder/bitstream.c2604
-rw-r--r--third_party/aom/av1/encoder/bitstream.h9
-rw-r--r--third_party/aom/av1/encoder/block.h14
-rw-r--r--third_party/aom/av1/encoder/context_tree.c26
-rw-r--r--third_party/aom/av1/encoder/context_tree.h4
-rw-r--r--third_party/aom/av1/encoder/cost.c18
-rw-r--r--third_party/aom/av1/encoder/cost.h10
-rw-r--r--third_party/aom/av1/encoder/dct.c606
-rw-r--r--third_party/aom/av1/encoder/encodeframe.c1362
-rw-r--r--third_party/aom/av1/encoder/encodeframe.h2
-rw-r--r--third_party/aom/av1/encoder/encodemb.c978
-rw-r--r--third_party/aom/av1/encoder/encodemb.h15
-rw-r--r--third_party/aom/av1/encoder/encodemv.c143
-rw-r--r--third_party/aom/av1/encoder/encodemv.h5
-rw-r--r--third_party/aom/av1/encoder/encoder.c1211
-rw-r--r--third_party/aom/av1/encoder/encoder.h119
-rw-r--r--third_party/aom/av1/encoder/encodetxb.c345
-rw-r--r--third_party/aom/av1/encoder/encodetxb.h17
-rw-r--r--third_party/aom/av1/encoder/ethread.c14
-rw-r--r--third_party/aom/av1/encoder/firstpass.c134
-rw-r--r--third_party/aom/av1/encoder/firstpass.h23
-rw-r--r--third_party/aom/av1/encoder/global_motion.c26
-rw-r--r--third_party/aom/av1/encoder/global_motion.h3
-rw-r--r--third_party/aom/av1/encoder/hybrid_fwd_txfm.c421
-rw-r--r--third_party/aom/av1/encoder/hybrid_fwd_txfm.h17
-rw-r--r--third_party/aom/av1/encoder/mcomp.c245
-rw-r--r--third_party/aom/av1/encoder/mcomp.h17
-rw-r--r--third_party/aom/av1/encoder/palette.c48
-rw-r--r--third_party/aom/av1/encoder/palette.h8
-rw-r--r--third_party/aom/av1/encoder/pickcdef.c102
-rw-r--r--third_party/aom/av1/encoder/picklpf.c111
-rw-r--r--third_party/aom/av1/encoder/picklpf.h5
-rw-r--r--third_party/aom/av1/encoder/pickrst.c75
-rw-r--r--third_party/aom/av1/encoder/ransac.c18
-rw-r--r--third_party/aom/av1/encoder/ratectrl.c69
-rw-r--r--third_party/aom/av1/encoder/ratectrl.h5
-rw-r--r--third_party/aom/av1/encoder/rd.c196
-rw-r--r--third_party/aom/av1/encoder/rd.h150
-rw-r--r--third_party/aom/av1/encoder/rdopt.c3417
-rw-r--r--third_party/aom/av1/encoder/rdopt.h25
-rw-r--r--third_party/aom/av1/encoder/segmentation.c14
-rw-r--r--third_party/aom/av1/encoder/speed_features.c34
-rw-r--r--third_party/aom/av1/encoder/speed_features.h31
-rw-r--r--third_party/aom/av1/encoder/temporal_filter.c83
-rw-r--r--third_party/aom/av1/encoder/temporal_filter.h6
-rw-r--r--third_party/aom/av1/encoder/tokenize.c107
-rw-r--r--third_party/aom/av1/encoder/tokenize.h3
-rw-r--r--third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c143
-rw-r--r--third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c11
-rw-r--r--third_party/aom/av1/encoder/x86/av1_quantize_avx2.c289
-rw-r--r--third_party/aom/av1/encoder/x86/dct_intrin_sse2.c60
-rw-r--r--third_party/aom/av1/encoder/x86/error_intrin_avx2.c25
-rw-r--r--third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c18
-rw-r--r--third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c73
59 files changed, 9090 insertions, 5254 deletions
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.c b/third_party/aom/av1/encoder/aq_cyclicrefresh.c
index b2b410617..05aa28c9f 100644
--- a/third_party/aom/av1/encoder/aq_cyclicrefresh.c
+++ b/third_party/aom/av1/encoder/aq_cyclicrefresh.c
@@ -352,10 +352,7 @@ void av1_cyclic_refresh_check_golden_update(AV1_COMP *const cpi) {
// For video conference clips, if the background has high motion in current
// frame because of the camera movement, set this frame as the golden frame.
// Use 70% and 5% as the thresholds for golden frame refreshing.
- // Also, force this frame as a golden update frame if this frame will change
- // the resolution (av1_resize_pending != 0).
- if (av1_resize_pending(cpi) ||
- (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
+ if (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1) {
av1_cyclic_refresh_set_golden_update(cpi);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
diff --git a/third_party/aom/av1/encoder/av1_quantize.c b/third_party/aom/av1/encoder/av1_quantize.c
index 63727df1f..dd53d4223 100644
--- a/third_party/aom/av1/encoder/av1_quantize.c
+++ b/third_party/aom/av1/encoder/av1_quantize.c
@@ -845,7 +845,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
}
#endif // CONFIG_NEW_QUANT
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
tran_low_t *qcoeff_ptr,
@@ -899,14 +898,29 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
switch (qparam->log_scale) {
case 0:
- aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
- p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
- pd->dequant, eob_ptr, sc->scan, sc->iscan
+ if (LIKELY(n_coeffs >= 8)) {
+ aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round, p->quant, p->quant_shift, qcoeff_ptr,
+ dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+ sc->iscan
#if CONFIG_AOM_QM
- ,
- qm_ptr, iqm_ptr
+ ,
+ qm_ptr, iqm_ptr
#endif
- );
+ );
+ } else {
+ // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
+ // quantization
+ aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round, p->quant, p->quant_shift, qcoeff_ptr,
+ dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+ sc->iscan
+#if CONFIG_AOM_QM
+ ,
+ qm_ptr, iqm_ptr
+#endif
+ );
+ }
break;
case 1:
aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
@@ -936,7 +950,6 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
}
}
-#if CONFIG_HIGHBITDEPTH
static INLINE void highbd_quantize_dc(
const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
@@ -958,14 +971,13 @@ static INLINE void highbd_quantize_dc(
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[0];
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> (16 - log_scale));
+ const int abs_qcoeff = (int)((tmp * quant) >> (16 - log_scale));
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / (1 << log_scale);
if (abs_qcoeff) eob = 0;
}
*eob_ptr = eob + 1;
}
-#endif // CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
@@ -1504,9 +1516,7 @@ void av1_highbd_quantize_dc_nuq_facade(
}
}
#endif // CONFIG_NEW_QUANT
-#endif // CONFIG_HIGHBITDEPTH
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr,
@@ -1547,15 +1557,14 @@ void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + round_ptr[rc != 0];
+ const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale);
#if CONFIG_AOM_QM
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
+ const int abs_qcoeff =
+ (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
#else
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp * quant_ptr[rc != 0]) >> shift);
+ const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> shift);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale;
#endif
@@ -1565,8 +1574,6 @@ void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
*eob_ptr = eob + 1;
}
-#endif // CONFIG_HIGHBITDEPTH
-
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
uint32_t t;
int l, m;
diff --git a/third_party/aom/av1/encoder/av1_quantize.h b/third_party/aom/av1/encoder/av1_quantize.h
index 4bc9cccc2..e5fc8b528 100644
--- a/third_party/aom/av1/encoder/av1_quantize.h
+++ b/third_party/aom/av1/encoder/av1_quantize.h
@@ -146,7 +146,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const QUANT_PARAM *qparam);
#endif // CONFIG_NEW_QUANT
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
tran_low_t *qcoeff_ptr,
@@ -190,7 +189,6 @@ void av1_highbd_quantize_dc_nuq_facade(
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const SCAN_ORDER *sc,
const QUANT_PARAM *qparam);
#endif // CONFIG_NEW_QUANT
-#endif // CONFIG_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/encoder/bgsprite.c b/third_party/aom/av1/encoder/bgsprite.c
new file mode 100644
index 000000000..64deade06
--- /dev/null
+++ b/third_party/aom/av1/encoder/bgsprite.c
@@ -0,0 +1,748 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#define _POSIX_C_SOURCE 200112L // rand_r()
+#include <assert.h>
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "av1/encoder/bgsprite.h"
+
+#include "aom_mem/aom_mem.h"
+#include "./aom_scale_rtcd.h"
+#include "av1/common/mv.h"
+#include "av1/common/warped_motion.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/global_motion.h"
+#include "av1/encoder/mathutils.h"
+#include "av1/encoder/temporal_filter.h"
+
+/* Blending Modes:
+ * 0 = Median
+ * 1 = Mean
+ */
+#define BGSPRITE_BLENDING_MODE 1
+
+/* Interpolation for panorama alignment sampling:
+ * 0 = Nearest neighbor
+ * 1 = Bilinear
+ */
+#define BGSPRITE_INTERPOLATION 0
+
+#define TRANSFORM_MAT_DIM 3
+
+typedef struct {
+#if CONFIG_HIGHBITDEPTH
+ uint16_t y;
+ uint16_t u;
+ uint16_t v;
+#else
+ uint8_t y;
+ uint8_t u;
+ uint8_t v;
+#endif // CONFIG_HIGHBITDEPTH
+} YuvPixel;
+
+// Maps to convert from matrix form to param vector form.
+static const int params_to_matrix_map[] = { 2, 3, 0, 4, 5, 1, 6, 7 };
+static const int matrix_to_params_map[] = { 2, 5, 0, 1, 3, 4, 6, 7 };
+
+// Convert the parameter array to a 3x3 matrix form.
+static void params_to_matrix(const double *const params, double *target) {
+ for (int i = 0; i < MAX_PARAMDIM - 1; i++) {
+ assert(params_to_matrix_map[i] < MAX_PARAMDIM - 1);
+ target[i] = params[params_to_matrix_map[i]];
+ }
+ target[8] = 1;
+}
+
+// Convert a 3x3 matrix to a parameter array form.
+static void matrix_to_params(const double *const matrix, double *target) {
+ for (int i = 0; i < MAX_PARAMDIM - 1; i++) {
+ assert(matrix_to_params_map[i] < MAX_PARAMDIM - 1);
+ target[i] = matrix[matrix_to_params_map[i]];
+ }
+}
+
+// Do matrix multiplication on params.
+static void multiply_params(double *const m1, double *const m2,
+ double *target) {
+ double m1_matrix[MAX_PARAMDIM];
+ double m2_matrix[MAX_PARAMDIM];
+ double result[MAX_PARAMDIM];
+
+ params_to_matrix(m1, m1_matrix);
+ params_to_matrix(m2, m2_matrix);
+ multiply_mat(m2_matrix, m1_matrix, result, TRANSFORM_MAT_DIM,
+ TRANSFORM_MAT_DIM, TRANSFORM_MAT_DIM);
+ matrix_to_params(result, target);
+}
+
+// Finds x and y limits of a single transformed image.
+// Width and height are the size of the input video.
+static void find_frame_limit(int width, int height,
+ const double *const transform, int *x_min,
+ int *x_max, int *y_min, int *y_max) {
+ double transform_matrix[MAX_PARAMDIM];
+ double xy_matrix[3] = { 0, 0, 1 };
+ double uv_matrix[3] = { 0 };
+// Macro used to update frame limits based on transformed coordinates.
+#define UPDATELIMITS(u, v, x_min, x_max, y_min, y_max) \
+ { \
+ if ((int)ceil(u) > *x_max) { \
+ *x_max = (int)ceil(u); \
+ } \
+ if ((int)floor(u) < *x_min) { \
+ *x_min = (int)floor(u); \
+ } \
+ if ((int)ceil(v) > *y_max) { \
+ *y_max = (int)ceil(v); \
+ } \
+ if ((int)floor(v) < *y_min) { \
+ *y_min = (int)floor(v); \
+ } \
+ }
+
+ params_to_matrix(transform, transform_matrix);
+ xy_matrix[0] = 0;
+ xy_matrix[1] = 0;
+ multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
+ TRANSFORM_MAT_DIM, 1);
+ *x_max = (int)ceil(uv_matrix[0]);
+ *x_min = (int)floor(uv_matrix[0]);
+ *y_max = (int)ceil(uv_matrix[1]);
+ *y_min = (int)floor(uv_matrix[1]);
+
+ xy_matrix[0] = width;
+ xy_matrix[1] = 0;
+ multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
+ TRANSFORM_MAT_DIM, 1);
+ UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
+
+ xy_matrix[0] = width;
+ xy_matrix[1] = height;
+ multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
+ TRANSFORM_MAT_DIM, 1);
+ UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
+
+ xy_matrix[0] = 0;
+ xy_matrix[1] = height;
+ multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
+ TRANSFORM_MAT_DIM, 1);
+ UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
+
+#undef UPDATELIMITS
+}
+
+// Finds x and y limits for arrays. Also finds the overall max and minimums
+static void find_limits(int width, int height, const double **const params,
+ int num_frames, int *x_min, int *x_max, int *y_min,
+ int *y_max, int *pano_x_min, int *pano_x_max,
+ int *pano_y_min, int *pano_y_max) {
+ *pano_x_max = INT_MIN;
+ *pano_x_min = INT_MAX;
+ *pano_y_max = INT_MIN;
+ *pano_y_min = INT_MAX;
+ for (int i = 0; i < num_frames; ++i) {
+ find_frame_limit(width, height, (const double *const)params[i], &x_min[i],
+ &x_max[i], &y_min[i], &y_max[i]);
+ if (x_max[i] > *pano_x_max) {
+ *pano_x_max = x_max[i];
+ }
+ if (x_min[i] < *pano_x_min) {
+ *pano_x_min = x_min[i];
+ }
+ if (y_max[i] > *pano_y_max) {
+ *pano_y_max = y_max[i];
+ }
+ if (y_min[i] < *pano_y_min) {
+ *pano_y_min = y_min[i];
+ }
+ }
+}
+
+// Inverts a 3x3 matrix that is in the parameter form.
+static void invert_params(const double *const params, double *target) {
+ double temp[MAX_PARAMDIM] = { 0 };
+ params_to_matrix(params, temp);
+
+ // Find determinant of matrix (expansion by minors).
+ const double det = temp[0] * ((temp[4] * temp[8]) - (temp[5] * temp[7])) -
+ temp[1] * ((temp[3] * temp[8]) - (temp[5] * temp[6])) +
+ temp[2] * ((temp[3] * temp[7]) - (temp[4] * temp[6]));
+ assert(det != 0);
+
+ // inverse is transpose of cofactor * 1/det.
+ double inverse[MAX_PARAMDIM] = { 0 };
+ inverse[0] = (temp[4] * temp[8] - temp[7] * temp[5]) / det;
+ inverse[1] = (temp[2] * temp[7] - temp[1] * temp[8]) / det;
+ inverse[2] = (temp[1] * temp[5] - temp[2] * temp[4]) / det;
+ inverse[3] = (temp[5] * temp[6] - temp[3] * temp[8]) / det;
+ inverse[4] = (temp[0] * temp[8] - temp[2] * temp[6]) / det;
+ inverse[5] = (temp[3] * temp[2] - temp[0] * temp[5]) / det;
+ inverse[6] = (temp[3] * temp[7] - temp[6] * temp[4]) / det;
+ inverse[7] = (temp[6] * temp[1] - temp[0] * temp[7]) / det;
+ inverse[8] = (temp[0] * temp[4] - temp[3] * temp[1]) / det;
+
+ matrix_to_params(inverse, target);
+}
+
+#if BGSPRITE_BLENDING_MODE == 0
+// swaps two YuvPixels.
+static void swap_yuv(YuvPixel *a, YuvPixel *b) {
+ const YuvPixel temp = *b;
+ *b = *a;
+ *a = temp;
+}
+
+// Partitions array to find pivot index in qselect.
+static int partition(YuvPixel arr[], int left, int right, int pivot_idx) {
+ YuvPixel pivot = arr[pivot_idx];
+
+ // Move pivot to the end.
+ swap_yuv(&arr[pivot_idx], &arr[right]);
+
+ int p_idx = left;
+ for (int i = left; i < right; ++i) {
+ if (arr[i].y <= pivot.y) {
+ swap_yuv(&arr[i], &arr[p_idx]);
+ p_idx++;
+ }
+ }
+
+ swap_yuv(&arr[p_idx], &arr[right]);
+
+ return p_idx;
+}
+
+// Returns the kth element in array, partially sorted in place (quickselect).
+static YuvPixel qselect(YuvPixel arr[], int left, int right, int k) {
+ if (left >= right) {
+ return arr[left];
+ }
+ unsigned int seed = (int)time(NULL);
+ int pivot_idx = left + rand_r(&seed) % (right - left + 1);
+ pivot_idx = partition(arr, left, right, pivot_idx);
+
+ if (k == pivot_idx) {
+ return arr[k];
+ } else if (k < pivot_idx) {
+ return qselect(arr, left, pivot_idx - 1, k);
+ } else {
+ return qselect(arr, pivot_idx + 1, right, k);
+ }
+}
+#endif // BGSPRITE_BLENDING_MODE == 0
+
+// Stitches images together to create ARF and stores it in 'panorama'.
+static void stitch_images(YV12_BUFFER_CONFIG **const frames,
+ const int num_frames, const int center_idx,
+ const double **const params, const int *const x_min,
+ const int *const x_max, const int *const y_min,
+ const int *const y_max, int pano_x_min,
+ int pano_x_max, int pano_y_min, int pano_y_max,
+ YV12_BUFFER_CONFIG *panorama) {
+ const int width = pano_x_max - pano_x_min + 1;
+ const int height = pano_y_max - pano_y_min + 1;
+
+ // Create temp_pano[y][x][num_frames] stack of pixel values
+ YuvPixel ***temp_pano = aom_malloc(height * sizeof(*temp_pano));
+ for (int i = 0; i < height; ++i) {
+ temp_pano[i] = aom_malloc(width * sizeof(**temp_pano));
+ for (int j = 0; j < width; ++j) {
+ temp_pano[i][j] = aom_malloc(num_frames * sizeof(***temp_pano));
+ }
+ }
+ // Create count[y][x] to count how many values in stack for median filtering
+ int **count = aom_malloc(height * sizeof(*count));
+ for (int i = 0; i < height; ++i) {
+ count[i] = aom_calloc(width, sizeof(**count)); // counts initialized to 0
+ }
+
+ // Re-sample images onto panorama (pre-median filtering).
+ const int x_offset = -pano_x_min;
+ const int y_offset = -pano_y_min;
+ const int frame_width = frames[0]->y_width;
+ const int frame_height = frames[0]->y_height;
+ for (int i = 0; i < num_frames; ++i) {
+ // Find transforms from panorama coordinate system back to single image
+ // coordinate system for sampling.
+ int transformed_width = x_max[i] - x_min[i] + 1;
+ int transformed_height = y_max[i] - y_min[i] + 1;
+
+ double transform_matrix[MAX_PARAMDIM];
+ double transform_params[MAX_PARAMDIM - 1];
+ invert_params(params[i], transform_params);
+ params_to_matrix(transform_params, transform_matrix);
+
+#if CONFIG_HIGHBITDEPTH
+ const uint16_t *y_buffer16 = CONVERT_TO_SHORTPTR(frames[i]->y_buffer);
+ const uint16_t *u_buffer16 = CONVERT_TO_SHORTPTR(frames[i]->u_buffer);
+ const uint16_t *v_buffer16 = CONVERT_TO_SHORTPTR(frames[i]->v_buffer);
+#endif // CONFIG_HIGHBITDEPTH
+
+ for (int y = 0; y < transformed_height; ++y) {
+ for (int x = 0; x < transformed_width; ++x) {
+ // Do transform.
+ double xy_matrix[3] = { x + x_min[i], y + y_min[i], 1 };
+ double uv_matrix[3] = { 0 };
+ multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
+ TRANSFORM_MAT_DIM, 1);
+
+ // Coordinates used for nearest neighbor interpolation.
+ int image_x = (int)round(uv_matrix[0]);
+ int image_y = (int)round(uv_matrix[1]);
+
+ // Temporary values for bilinear interpolation
+ double interpolated_yvalue = 0.0;
+ double interpolated_uvalue = 0.0;
+ double interpolated_vvalue = 0.0;
+ double interpolated_fraction = 0.0;
+ int interpolation_count = 0;
+
+#if BGSPRITE_INTERPOLATION == 1
+ // Coordintes used for bilinear interpolation.
+ double x_base;
+ double y_base;
+ double x_decimal = modf(uv_matrix[0], &x_base);
+ double y_decimal = modf(uv_matrix[1], &y_base);
+
+ if ((x_decimal > 0.2 && x_decimal < 0.8) ||
+ (y_decimal > 0.2 && y_decimal < 0.8)) {
+ for (int u = 0; u < 2; ++u) {
+ for (int v = 0; v < 2; ++v) {
+ int interp_x = (int)x_base + u;
+ int interp_y = (int)y_base + v;
+ if (interp_x >= 0 && interp_x < frame_width && interp_y >= 0 &&
+ interp_y < frame_height) {
+ interpolation_count++;
+
+ interpolated_fraction +=
+ fabs(u - x_decimal) * fabs(v - y_decimal);
+ int ychannel_idx = interp_y * frames[i]->y_stride + interp_x;
+ int uvchannel_idx = (interp_y >> frames[i]->subsampling_y) *
+ frames[i]->uv_stride +
+ (interp_x >> frames[i]->subsampling_x);
+#if CONFIG_HIGHBITDEPTH
+ if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) {
+ interpolated_yvalue += (1 - fabs(u - x_decimal)) *
+ (1 - fabs(v - y_decimal)) *
+ y_buffer16[ychannel_idx];
+ interpolated_uvalue += (1 - fabs(u - x_decimal)) *
+ (1 - fabs(v - y_decimal)) *
+ u_buffer16[uvchannel_idx];
+ interpolated_vvalue += (1 - fabs(u - x_decimal)) *
+ (1 - fabs(v - y_decimal)) *
+ v_buffer16[uvchannel_idx];
+ } else {
+#endif // CONFIG_HIGHBITDEPTH
+ interpolated_yvalue += (1 - fabs(u - x_decimal)) *
+ (1 - fabs(v - y_decimal)) *
+ frames[i]->y_buffer[ychannel_idx];
+ interpolated_uvalue += (1 - fabs(u - x_decimal)) *
+ (1 - fabs(v - y_decimal)) *
+ frames[i]->u_buffer[uvchannel_idx];
+ interpolated_vvalue += (1 - fabs(u - x_decimal)) *
+ (1 - fabs(v - y_decimal)) *
+ frames[i]->v_buffer[uvchannel_idx];
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+ }
+ }
+ }
+ }
+#endif // BGSPRITE_INTERPOLATION == 1
+
+ if (BGSPRITE_INTERPOLATION && interpolation_count > 2) {
+ if (interpolation_count != 4) {
+ interpolated_yvalue /= interpolated_fraction;
+ interpolated_uvalue /= interpolated_fraction;
+ interpolated_vvalue /= interpolated_fraction;
+ }
+ int pano_x = x + x_min[i] + x_offset;
+ int pano_y = y + y_min[i] + y_offset;
+
+#if CONFIG_HIGHBITDEPTH
+ if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) {
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
+ (uint16_t)interpolated_yvalue;
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
+ (uint16_t)interpolated_uvalue;
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
+ (uint16_t)interpolated_vvalue;
+ } else {
+#endif // CONFIG_HIGHBITDEPTH
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
+ (uint8_t)interpolated_yvalue;
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
+ (uint8_t)interpolated_uvalue;
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
+ (uint8_t)interpolated_vvalue;
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+ ++count[pano_y][pano_x];
+ } else if (image_x >= 0 && image_x < frame_width && image_y >= 0 &&
+ image_y < frame_height) {
+ // Place in panorama stack.
+ int pano_x = x + x_min[i] + x_offset;
+ int pano_y = y + y_min[i] + y_offset;
+
+ int ychannel_idx = image_y * frames[i]->y_stride + image_x;
+ int uvchannel_idx =
+ (image_y >> frames[i]->subsampling_y) * frames[i]->uv_stride +
+ (image_x >> frames[i]->subsampling_x);
+#if CONFIG_HIGHBITDEPTH
+ if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) {
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
+ y_buffer16[ychannel_idx];
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
+ u_buffer16[uvchannel_idx];
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
+ v_buffer16[uvchannel_idx];
+ } else {
+#endif // CONFIG_HIGHBITDEPTH
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
+ frames[i]->y_buffer[ychannel_idx];
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
+ frames[i]->u_buffer[uvchannel_idx];
+ temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
+ frames[i]->v_buffer[uvchannel_idx];
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+ ++count[pano_y][pano_x];
+ }
+ }
+ }
+ }
+
+#if BGSPRITE_BLENDING_MODE == 1
+ // Apply mean filtering and store result in temp_pano[y][x][0].
+ for (int y = 0; y < height; ++y) {
+ for (int x = 0; x < width; ++x) {
+ if (count[y][x] == 0) {
+ // Just make the pixel black.
+ // TODO(toddnguyen): Color the pixel with nearest neighbor
+ } else {
+ // Find
+ uint32_t y_sum = 0;
+ uint32_t u_sum = 0;
+ uint32_t v_sum = 0;
+ for (int i = 0; i < count[y][x]; ++i) {
+ y_sum += temp_pano[y][x][i].y;
+ u_sum += temp_pano[y][x][i].u;
+ v_sum += temp_pano[y][x][i].v;
+ }
+
+ const uint32_t unsigned_count = (uint32_t)count[y][x];
+
+#if CONFIG_HIGHBITDEPTH
+ if (panorama->flags & YV12_FLAG_HIGHBITDEPTH) {
+ temp_pano[y][x][0].y = (uint16_t)OD_DIVU(y_sum, unsigned_count);
+ temp_pano[y][x][0].u = (uint16_t)OD_DIVU(u_sum, unsigned_count);
+ temp_pano[y][x][0].v = (uint16_t)OD_DIVU(v_sum, unsigned_count);
+ } else {
+#endif // CONFIG_HIGHBITDEPTH
+ temp_pano[y][x][0].y = (uint8_t)OD_DIVU(y_sum, unsigned_count);
+ temp_pano[y][x][0].u = (uint8_t)OD_DIVU(u_sum, unsigned_count);
+ temp_pano[y][x][0].v = (uint8_t)OD_DIVU(v_sum, unsigned_count);
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+ }
+ }
+ }
+#else
+ // Apply median filtering using quickselect.
+ for (int y = 0; y < height; ++y) {
+ for (int x = 0; x < width; ++x) {
+ if (count[y][x] == 0) {
+ // Just make the pixel black.
+ // TODO(toddnguyen): Color the pixel with nearest neighbor
+ } else {
+ // Find
+ const int median_idx = (int)floor(count[y][x] / 2);
+ YuvPixel median =
+ qselect(temp_pano[y][x], 0, count[y][x] - 1, median_idx);
+
+ // Make the median value the 0th index for UV subsampling later
+ temp_pano[y][x][0] = median;
+ assert(median.y == temp_pano[y][x][0].y &&
+ median.u == temp_pano[y][x][0].u &&
+ median.v == temp_pano[y][x][0].v);
+ }
+ }
+ }
+#endif // BGSPRITE_BLENDING_MODE == 1
+
+ // NOTE(toddnguyen): Right now the ARF in the cpi struct is fixed size at
+ // the same size as the frames. For now, we crop the generated panorama.
+ // assert(panorama->y_width < width && panorama->y_height < height);
+ const int crop_x_offset = x_min[center_idx] + x_offset;
+ const int crop_y_offset = y_min[center_idx] + y_offset;
+
+#if CONFIG_HIGHBITDEPTH
+ if (panorama->flags & YV12_FLAG_HIGHBITDEPTH) {
+ // Use median Y value.
+ uint16_t *pano_y_buffer16 = CONVERT_TO_SHORTPTR(panorama->y_buffer);
+ for (int y = 0; y < panorama->y_height; ++y) {
+ for (int x = 0; x < panorama->y_width; ++x) {
+ const int ychannel_idx = y * panorama->y_stride + x;
+ if (count[y + crop_y_offset][x + crop_x_offset] > 0) {
+ pano_y_buffer16[ychannel_idx] =
+ temp_pano[y + crop_y_offset][x + crop_x_offset][0].y;
+ } else {
+ pano_y_buffer16[ychannel_idx] = 0;
+ }
+ }
+ }
+
+ // UV subsampling with median UV values
+ uint16_t *pano_u_buffer16 = CONVERT_TO_SHORTPTR(panorama->u_buffer);
+ uint16_t *pano_v_buffer16 = CONVERT_TO_SHORTPTR(panorama->v_buffer);
+
+ for (int y = 0; y < panorama->uv_height; ++y) {
+ for (int x = 0; x < panorama->uv_width; ++x) {
+ uint32_t avg_count = 0;
+ uint32_t u_sum = 0;
+ uint32_t v_sum = 0;
+
+ // Look at surrounding pixels for subsampling
+ for (int s_x = 0; s_x < panorama->subsampling_x + 1; ++s_x) {
+ for (int s_y = 0; s_y < panorama->subsampling_y + 1; ++s_y) {
+ int y_sample = crop_y_offset + (y << panorama->subsampling_y) + s_y;
+ int x_sample = crop_x_offset + (x << panorama->subsampling_x) + s_x;
+ if (y_sample > 0 && y_sample < height && x_sample > 0 &&
+ x_sample < width && count[y_sample][x_sample] > 0) {
+ u_sum += temp_pano[y_sample][x_sample][0].u;
+ v_sum += temp_pano[y_sample][x_sample][0].v;
+ avg_count++;
+ }
+ }
+ }
+
+ const int uvchannel_idx = y * panorama->uv_stride + x;
+ if (avg_count != 0) {
+ pano_u_buffer16[uvchannel_idx] = (uint16_t)OD_DIVU(u_sum, avg_count);
+ pano_v_buffer16[uvchannel_idx] = (uint16_t)OD_DIVU(v_sum, avg_count);
+ } else {
+ pano_u_buffer16[uvchannel_idx] = 0;
+ pano_v_buffer16[uvchannel_idx] = 0;
+ }
+ }
+ }
+ } else {
+#endif // CONFIG_HIGHBITDEPTH
+ // Use median Y value.
+ for (int y = 0; y < panorama->y_height; ++y) {
+ for (int x = 0; x < panorama->y_width; ++x) {
+ const int ychannel_idx = y * panorama->y_stride + x;
+ if (count[y + crop_y_offset][x + crop_x_offset] > 0) {
+ panorama->y_buffer[ychannel_idx] =
+ temp_pano[y + crop_y_offset][x + crop_x_offset][0].y;
+ } else {
+ panorama->y_buffer[ychannel_idx] = 0;
+ }
+ }
+ }
+
+ // UV subsampling with median UV values
+ for (int y = 0; y < panorama->uv_height; ++y) {
+ for (int x = 0; x < panorama->uv_width; ++x) {
+ uint16_t avg_count = 0;
+ uint16_t u_sum = 0;
+ uint16_t v_sum = 0;
+
+ // Look at surrounding pixels for subsampling
+ for (int s_x = 0; s_x < panorama->subsampling_x + 1; ++s_x) {
+ for (int s_y = 0; s_y < panorama->subsampling_y + 1; ++s_y) {
+ int y_sample = crop_y_offset + (y << panorama->subsampling_y) + s_y;
+ int x_sample = crop_x_offset + (x << panorama->subsampling_x) + s_x;
+ if (y_sample > 0 && y_sample < height && x_sample > 0 &&
+ x_sample < width && count[y_sample][x_sample] > 0) {
+ u_sum += temp_pano[y_sample][x_sample][0].u;
+ v_sum += temp_pano[y_sample][x_sample][0].v;
+ avg_count++;
+ }
+ }
+ }
+
+ const int uvchannel_idx = y * panorama->uv_stride + x;
+ if (avg_count != 0) {
+ panorama->u_buffer[uvchannel_idx] =
+ (uint8_t)OD_DIVU(u_sum, avg_count);
+ panorama->v_buffer[uvchannel_idx] =
+ (uint8_t)OD_DIVU(v_sum, avg_count);
+ } else {
+ panorama->u_buffer[uvchannel_idx] = 0;
+ panorama->v_buffer[uvchannel_idx] = 0;
+ }
+ }
+ }
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+
+ for (int i = 0; i < height; ++i) {
+ for (int j = 0; j < width; ++j) {
+ aom_free(temp_pano[i][j]);
+ }
+ aom_free(temp_pano[i]);
+ aom_free(count[i]);
+ }
+ aom_free(count);
+ aom_free(temp_pano);
+}
+
+int av1_background_sprite(AV1_COMP *cpi, int distance) {
+ YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
+ static const double identity_params[MAX_PARAMDIM - 1] = {
+ 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
+ };
+
+ const int frames_after_arf =
+ av1_lookahead_depth(cpi->lookahead) - distance - 1;
+ int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
+ int frames_bwd;
+
+ // Define the forward and backwards filter limits for this arnr group.
+ if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
+ if (frames_fwd > distance) frames_fwd = distance;
+ frames_bwd = frames_fwd;
+
+#if CONFIG_EXT_REFS
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) {
+ cpi->alt_ref_buffer = av1_lookahead_peek(cpi->lookahead, distance)->img;
+ cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 1;
+ frames_fwd = 0;
+ frames_bwd = 0;
+ } else {
+ cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 0;
+ }
+#endif // CONFIG_EXT_REFS
+
+ const int start_frame = distance + frames_fwd;
+ const int frames_to_stitch = frames_bwd + 1 + frames_fwd;
+
+ // Get frames to be included in background sprite.
+ for (int frame = 0; frame < frames_to_stitch; ++frame) {
+ const int which_buffer = start_frame - frame;
+ struct lookahead_entry *buf =
+ av1_lookahead_peek(cpi->lookahead, which_buffer);
+ frames[frames_to_stitch - 1 - frame] = &buf->img;
+ }
+
+ YV12_BUFFER_CONFIG temp_bg;
+ memset(&temp_bg, 0, sizeof(temp_bg));
+ aom_alloc_frame_buffer(&temp_bg, frames[0]->y_width, frames[0]->y_height,
+ frames[0]->subsampling_x, frames[0]->subsampling_y,
+#if CONFIG_HIGHBITDEPTH
+ frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
+#endif
+ frames[0]->border, 0);
+ aom_yv12_copy_frame(frames[0], &temp_bg);
+ temp_bg.bit_depth = frames[0]->bit_depth;
+
+ // Allocate empty arrays for parameters between frames.
+ double **params = aom_malloc(frames_to_stitch * sizeof(*params));
+ for (int i = 0; i < frames_to_stitch; ++i) {
+ params[i] = aom_malloc(sizeof(identity_params));
+ memcpy(params[i], identity_params, sizeof(identity_params));
+ }
+
+ // Use global motion to find affine transformations between frames.
+ // params[i] will have the transform from frame[i] to frame[i-1].
+ // params[0] will have the identity matrix because it has no previous frame.
+ TransformationType model = AFFINE;
+ int inliers_by_motion[RANSAC_NUM_MOTIONS];
+ for (int frame = 0; frame < frames_to_stitch - 1; ++frame) {
+ const int global_motion_ret = compute_global_motion_feature_based(
+ model, frames[frame + 1], frames[frame],
+#if CONFIG_HIGHBITDEPTH
+ cpi->common.bit_depth,
+#endif // CONFIG_HIGHBITDEPTH
+ inliers_by_motion, params[frame + 1], RANSAC_NUM_MOTIONS);
+
+ // Quit if global motion had an error.
+ if (global_motion_ret == 0) {
+ for (int i = 0; i < frames_to_stitch; ++i) {
+ aom_free(params[i]);
+ }
+ aom_free(params);
+ return 1;
+ }
+ }
+
+ // Compound the transformation parameters.
+ for (int i = 1; i < frames_to_stitch; ++i) {
+ multiply_params(params[i - 1], params[i], params[i]);
+ }
+
+ // Compute frame limits for final stitched images.
+ int pano_x_max = INT_MIN;
+ int pano_x_min = INT_MAX;
+ int pano_y_max = INT_MIN;
+ int pano_y_min = INT_MAX;
+ int *x_max = aom_malloc(frames_to_stitch * sizeof(*x_max));
+ int *x_min = aom_malloc(frames_to_stitch * sizeof(*x_min));
+ int *y_max = aom_malloc(frames_to_stitch * sizeof(*y_max));
+ int *y_min = aom_malloc(frames_to_stitch * sizeof(*y_min));
+
+ find_limits(cpi->initial_width, cpi->initial_height,
+ (const double **const)params, frames_to_stitch, x_min, x_max,
+ y_min, y_max, &pano_x_min, &pano_x_max, &pano_y_min, &pano_y_max);
+
+ // Center panorama on the ARF.
+ const int center_idx = frames_bwd;
+ assert(center_idx >= 0 && center_idx < frames_to_stitch);
+
+ // Recompute transformations to adjust to center image.
+ // Invert center image's transform.
+ double inverse[MAX_PARAMDIM - 1] = { 0 };
+ invert_params(params[center_idx], inverse);
+
+ // Multiply the inverse to all transformation parameters.
+ for (int i = 0; i < frames_to_stitch; ++i) {
+ multiply_params(inverse, params[i], params[i]);
+ }
+
+ // Recompute frame limits for new adjusted center.
+ find_limits(cpi->initial_width, cpi->initial_height,
+ (const double **const)params, frames_to_stitch, x_min, x_max,
+ y_min, y_max, &pano_x_min, &pano_x_max, &pano_y_min, &pano_y_max);
+
+ // Stitch Images.
+ stitch_images(frames, frames_to_stitch, center_idx,
+ (const double **const)params, x_min, x_max, y_min, y_max,
+ pano_x_min, pano_x_max, pano_y_min, pano_y_max, &temp_bg);
+
+ // Apply temporal filter.
+ av1_temporal_filter(cpi, &temp_bg, distance);
+
+ // Free memory.
+ aom_free_frame_buffer(&temp_bg);
+ for (int i = 0; i < frames_to_stitch; ++i) {
+ aom_free(params[i]);
+ }
+ aom_free(params);
+ aom_free(x_max);
+ aom_free(x_min);
+ aom_free(y_max);
+ aom_free(y_min);
+
+ return 0;
+}
diff --git a/third_party/aom/av1/encoder/bgsprite.h b/third_party/aom/av1/encoder/bgsprite.h
new file mode 100644
index 000000000..711b00e40
--- /dev/null
+++ b/third_party/aom/av1/encoder/bgsprite.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_BGSPRITE_H_
+#define AV1_ENCODER_BGSPRITE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "av1/encoder/encoder.h"
+
+// Creates alternate reference frame staring from source image + frames up to
+// 'distance' past source frame.
+// Returns 0 on success and 1 on failure.
+int av1_background_sprite(AV1_COMP *cpi, int distance);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // AV1_ENCODER_BGSPRITE_H_
diff --git a/third_party/aom/av1/encoder/bitstream.c b/third_party/aom/av1/encoder/bitstream.c
index f8378b14d..2e0abc186 100644
--- a/third_party/aom/av1/encoder/bitstream.c
+++ b/third_party/aom/av1/encoder/bitstream.c
@@ -26,7 +26,6 @@
#if CONFIG_CDEF
#include "av1/common/cdef.h"
-#include "av1/common/clpf.h"
#endif // CONFIG_CDEF
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
@@ -61,21 +60,12 @@
#include "av1/encoder/pvq_encoder.h"
#endif
-static struct av1_token intra_mode_encodings[INTRA_MODES];
-static struct av1_token switchable_interp_encodings[SWITCHABLE_FILTERS];
-static struct av1_token partition_encodings[PARTITION_TYPES];
-#if CONFIG_EXT_INTER
-static const struct av1_token
- inter_compound_mode_encodings[INTER_COMPOUND_MODES] = {
- { 2, 2 }, { 12, 4 }, { 52, 6 }, { 53, 6 },
- { 54, 6 }, { 55, 6 }, { 0, 1 }, { 7, 3 }
- };
-#endif // CONFIG_EXT_INTER
-#if CONFIG_PALETTE
-static struct av1_token palette_size_encodings[PALETTE_SIZES];
-static struct av1_token palette_color_index_encodings[PALETTE_SIZES]
- [PALETTE_COLORS];
-#endif // CONFIG_PALETTE
+#define ENC_MISMATCH_DEBUG 0
+
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+static struct av1_token
+ inter_singleref_comp_mode_encodings[INTER_SINGLEREF_COMP_MODES];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
static INLINE void write_uniform(aom_writer *w, int n, int v) {
@@ -97,9 +87,6 @@ static struct av1_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES];
#else
static struct av1_token ext_tx_encodings[TX_TYPES];
#endif // CONFIG_EXT_TX
-#if CONFIG_GLOBAL_MOTION
-static struct av1_token global_motion_types_encodings[GLOBAL_TRANS_TYPES];
-#endif // CONFIG_GLOBAL_MOTION
#if CONFIG_EXT_INTRA
#if CONFIG_INTRA_INTERP
static struct av1_token intra_filter_encodings[INTRA_FILTERS];
@@ -114,7 +101,9 @@ static struct av1_token compound_type_encodings[COMPOUND_TYPES];
#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-static struct av1_token motion_mode_encodings[MOTION_MODES];
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+static struct av1_token ncobmc_mode_encodings[MAX_NCOBMC_MODES];
+#endif
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_LOOP_RESTORATION
static struct av1_token switchable_restore_encodings[RESTORE_SWITCHABLE_TYPES];
@@ -129,9 +118,9 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
int *const tile_col_size_bytes);
void av1_encode_token_init(void) {
-#if CONFIG_EXT_TX || CONFIG_PALETTE
+#if CONFIG_EXT_TX
int s;
-#endif // CONFIG_EXT_TX || CONFIG_PALETTE
+#endif // CONFIG_EXT_TX
#if CONFIG_EXT_TX
for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
av1_tokens_from_tree(ext_tx_inter_encodings[s], av1_ext_tx_inter_tree[s]);
@@ -142,17 +131,6 @@ void av1_encode_token_init(void) {
#else
av1_tokens_from_tree(ext_tx_encodings, av1_ext_tx_tree);
#endif // CONFIG_EXT_TX
- av1_tokens_from_tree(intra_mode_encodings, av1_intra_mode_tree);
- av1_tokens_from_tree(switchable_interp_encodings, av1_switchable_interp_tree);
- av1_tokens_from_tree(partition_encodings, av1_partition_tree);
-
-#if CONFIG_PALETTE
- av1_tokens_from_tree(palette_size_encodings, av1_palette_size_tree);
- for (s = 0; s < PALETTE_SIZES; ++s) {
- av1_tokens_from_tree(palette_color_index_encodings[s],
- av1_palette_color_index_tree[s]);
- }
-#endif // CONFIG_PALETTE
#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
av1_tokens_from_tree(intra_filter_encodings, av1_intra_filter_tree);
@@ -161,17 +139,19 @@ void av1_encode_token_init(void) {
#if CONFIG_INTERINTRA
av1_tokens_from_tree(interintra_mode_encodings, av1_interintra_mode_tree);
#endif // CONFIG_INTERINTRA
+#if CONFIG_COMPOUND_SINGLEREF
+ av1_tokens_from_tree(inter_singleref_comp_mode_encodings,
+ av1_inter_singleref_comp_mode_tree);
+#endif // CONFIG_COMPOUND_SINGLEREF
#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
av1_tokens_from_tree(compound_type_encodings, av1_compound_type_tree);
#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- av1_tokens_from_tree(motion_mode_encodings, av1_motion_mode_tree);
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+ av1_tokens_from_tree(ncobmc_mode_encodings, av1_ncobmc_mode_tree);
+#endif
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if CONFIG_GLOBAL_MOTION
- av1_tokens_from_tree(global_motion_types_encodings,
- av1_global_motion_types_tree);
-#endif // CONFIG_GLOBAL_MOTION
#if CONFIG_LOOP_RESTORATION
av1_tokens_from_tree(switchable_restore_encodings,
av1_switchable_restore_tree);
@@ -195,10 +175,6 @@ void av1_encode_token_init(void) {
#else
av1_indices_from_tree(av1_ext_tx_ind, av1_ext_tx_inv, av1_ext_tx_tree);
#endif
- av1_indices_from_tree(av1_intra_mode_ind, av1_intra_mode_inv,
- av1_intra_mode_tree);
- av1_indices_from_tree(av1_inter_mode_ind, av1_inter_mode_inv,
- av1_inter_mode_tree);
}
static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx,
@@ -214,65 +190,72 @@ static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx,
(void)cm;
}
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
-static void write_interintra_mode(aom_writer *w, INTERINTRA_MODE mode,
- const aom_prob *probs) {
- av1_write_token(w, av1_interintra_mode_tree, probs,
- &interintra_mode_encodings[mode]);
-}
-#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
-
static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode,
FRAME_CONTEXT *ec_ctx, const int16_t mode_ctx) {
const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
- const aom_prob newmv_prob = ec_ctx->newmv_prob[newmv_ctx];
- aom_write(w, mode != NEWMV, newmv_prob);
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, mode != NEWMV, ec_ctx->newmv_cdf[newmv_ctx], 2);
+#else
+ aom_write(w, mode != NEWMV, ec_ctx->newmv_prob[newmv_ctx]);
+#endif
if (mode != NEWMV) {
- const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
- const aom_prob zeromv_prob = ec_ctx->zeromv_prob[zeromv_ctx];
-
if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
assert(mode == ZEROMV);
return;
}
- aom_write(w, mode != ZEROMV, zeromv_prob);
+ const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, mode != ZEROMV, ec_ctx->zeromv_cdf[zeromv_ctx], 2);
+#else
+ aom_write(w, mode != ZEROMV, ec_ctx->zeromv_prob[zeromv_ctx]);
+#endif
if (mode != ZEROMV) {
int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
- aom_prob refmv_prob;
if (mode_ctx & (1 << SKIP_NEARESTMV_OFFSET)) refmv_ctx = 6;
if (mode_ctx & (1 << SKIP_NEARMV_OFFSET)) refmv_ctx = 7;
if (mode_ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) refmv_ctx = 8;
-
- refmv_prob = ec_ctx->refmv_prob[refmv_ctx];
- aom_write(w, mode != NEARESTMV, refmv_prob);
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, mode != NEARESTMV, ec_ctx->refmv_cdf[refmv_ctx], 2);
+#else
+ aom_write(w, mode != NEARESTMV, ec_ctx->refmv_prob[refmv_ctx]);
+#endif
}
}
}
-static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
+static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
const MB_MODE_INFO_EXT *mbmi_ext, aom_writer *w) {
uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
assert(mbmi->ref_mv_idx < 3);
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV ||
+ mbmi->mode == SR_NEW_NEWMV) {
+#else // !CONFIG_COMPOUND_SINGLEREF
if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
-#else
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
if (mbmi->mode == NEWMV) {
-#endif
+#endif // CONFIG_EXT_INTER
int idx;
for (idx = 0; idx < 2; ++idx) {
if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
uint8_t drl_ctx =
av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- aom_prob drl_prob = cm->fc->drl_prob[drl_ctx];
- aom_write(w, mbmi->ref_mv_idx != idx, drl_prob);
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, mbmi->ref_mv_idx != idx, ec_ctx->drl_cdf[drl_ctx],
+ 2);
+#else
+ aom_write(w, mbmi->ref_mv_idx != idx, ec_ctx->drl_prob[drl_ctx]);
+#endif
if (mbmi->ref_mv_idx == idx) return;
}
}
@@ -286,9 +269,12 @@ static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
uint8_t drl_ctx =
av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- aom_prob drl_prob = cm->fc->drl_prob[drl_ctx];
-
- aom_write(w, mbmi->ref_mv_idx != (idx - 1), drl_prob);
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, mbmi->ref_mv_idx != (idx - 1),
+ ec_ctx->drl_cdf[drl_ctx], 2);
+#else
+ aom_write(w, mbmi->ref_mv_idx != (idx - 1), ec_ctx->drl_prob[drl_ctx]);
+#endif
if (mbmi->ref_mv_idx == (idx - 1)) return;
}
}
@@ -297,16 +283,28 @@ static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
}
#if CONFIG_EXT_INTER
-static void write_inter_compound_mode(AV1_COMMON *cm, aom_writer *w,
- PREDICTION_MODE mode,
+static void write_inter_compound_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
+ aom_writer *w, PREDICTION_MODE mode,
const int16_t mode_ctx) {
- const aom_prob *const inter_compound_probs =
- cm->fc->inter_compound_mode_probs[mode_ctx];
-
assert(is_inter_compound_mode(mode));
- av1_write_token(w, av1_inter_compound_mode_tree, inter_compound_probs,
- &inter_compound_mode_encodings[INTER_COMPOUND_OFFSET(mode)]);
+ (void)cm;
+ aom_write_symbol(w, INTER_COMPOUND_OFFSET(mode),
+ xd->tile_ctx->inter_compound_mode_cdf[mode_ctx],
+ INTER_COMPOUND_MODES);
+}
+
+#if CONFIG_COMPOUND_SINGLEREF
+static void write_inter_singleref_comp_mode(MACROBLOCKD *xd, aom_writer *w,
+ PREDICTION_MODE mode,
+ const int16_t mode_ctx) {
+ assert(is_inter_singleref_comp_mode(mode));
+ aom_cdf_prob *const inter_singleref_comp_cdf =
+ xd->tile_ctx->inter_singleref_comp_mode_cdf[mode_ctx];
+
+ aom_write_symbol(w, INTER_SINGLEREF_COMP_OFFSET(mode),
+ inter_singleref_comp_cdf, INTER_SINGLEREF_COMP_MODES);
}
+#endif // CONFIG_COMPOUND_SINGLEREF
#endif // CONFIG_EXT_INTER
static void encode_unsigned_max(struct aom_write_bit_buffer *wb, int data,
@@ -314,11 +312,10 @@ static void encode_unsigned_max(struct aom_write_bit_buffer *wb, int data,
aom_wb_write_literal(wb, data, get_unsigned_bits(max));
}
-#if !CONFIG_EC_ADAPT || \
- (CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION || CONFIG_EXT_INTER)
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
static void prob_diff_update(const aom_tree_index *tree,
aom_prob probs[/*n - 1*/],
- const unsigned int counts[/*n - 1*/], int n,
+ const unsigned int counts[/* n */], int n,
int probwt, aom_writer *w) {
int i;
unsigned int branch_ct[32][2];
@@ -332,31 +329,15 @@ static void prob_diff_update(const aom_tree_index *tree,
}
#endif
-#if CONFIG_EXT_INTER || !CONFIG_EC_ADAPT
-static int prob_diff_update_savings(const aom_tree_index *tree,
- aom_prob probs[/*n - 1*/],
- const unsigned int counts[/*n - 1*/], int n,
- int probwt) {
- int i;
- unsigned int branch_ct[32][2];
- int savings = 0;
-
- // Assuming max number of probabilities <= 32
- assert(n <= 32);
- av1_tree_probs_from_distribution(tree, branch_ct, counts);
- for (i = 0; i < n - 1; ++i) {
- savings +=
- av1_cond_prob_diff_update_savings(&probs[i], branch_ct[i], probwt);
- }
- return savings;
-}
-#endif // CONFIG_EXT_INTER || !CONFIG_EC_ADAPT
-
#if CONFIG_VAR_TX
-static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+static void write_tx_size_vartx(const AV1_COMMON *cm, MACROBLOCKD *xd,
const MB_MODE_INFO *mbmi, TX_SIZE tx_size,
int depth, int blk_row, int blk_col,
aom_writer *w) {
+#if CONFIG_NEW_MULTISYMBOL
+ FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+ (void)cm;
+#endif
const int tx_row = blk_row >> 1;
const int tx_col = blk_col >> 1;
const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
@@ -374,16 +355,31 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
return;
}
+#if CONFIG_RECT_TX_EXT
+ if (tx_size == mbmi->inter_tx_size[tx_row][tx_col] ||
+ mbmi->tx_size == quarter_txsize_lookup[mbmi->sb_type]) {
+#else
if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) {
+#endif
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, 0, ec_ctx->txfm_partition_cdf[ctx], 2);
+#else
aom_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
+#endif
+
txfm_partition_update(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row, tx_size, tx_size);
+ // TODO(yuec): set correct txfm partition update for qttx
} else {
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bsl = tx_size_wide_unit[sub_txs];
int i;
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, 1, ec_ctx->txfm_partition_cdf[ctx], 2);
+#else
aom_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
+#endif
if (tx_size == TX_8X8) {
txfm_partition_update(xd->above_txfm_context + blk_col,
@@ -401,6 +397,7 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
}
}
+#if !CONFIG_NEW_MULTISYMBOL
static void update_txfm_partition_probs(AV1_COMMON *cm, aom_writer *w,
FRAME_COUNTS *counts, int probwt) {
int k;
@@ -408,18 +405,15 @@ static void update_txfm_partition_probs(AV1_COMMON *cm, aom_writer *w,
av1_cond_prob_diff_update(w, &cm->fc->txfm_partition_prob[k],
counts->txfm_partition[k], probwt);
}
+#endif // CONFIG_NEW_MULTISYMBOL
#endif
static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd,
aom_writer *w) {
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const BLOCK_SIZE bsize = mbmi->sb_type;
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
(void)cm;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
-#endif
// For sub8x8 blocks the tx_size symbol does not need to be sent
#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_EXT_TX) && CONFIG_RECT_TX
if (bsize > BLOCK_4X4) {
@@ -439,22 +433,19 @@ static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd,
aom_write_symbol(w, depth, ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
tx_size_cat + 2);
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
aom_write(w, tx_size == quarter_txsize_lookup[bsize],
cm->fc->quarter_tx_size_prob);
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
}
}
+#if !CONFIG_NEW_MULTISYMBOL
static void update_inter_mode_probs(AV1_COMMON *cm, aom_writer *w,
FRAME_COUNTS *counts) {
int i;
-#if CONFIG_TILE_GROUPS
const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
av1_cond_prob_diff_update(w, &cm->fc->newmv_prob[i], counts->newmv_mode[i],
probwt);
@@ -468,31 +459,7 @@ static void update_inter_mode_probs(AV1_COMMON *cm, aom_writer *w,
av1_cond_prob_diff_update(w, &cm->fc->drl_prob[i], counts->drl_mode[i],
probwt);
}
-
-#if CONFIG_EXT_INTER
-static void update_inter_compound_mode_probs(AV1_COMMON *cm, int probwt,
- aom_writer *w) {
- const int savings_thresh = av1_cost_one(GROUP_DIFF_UPDATE_PROB) -
- av1_cost_zero(GROUP_DIFF_UPDATE_PROB);
- int i;
- int savings = 0;
- int do_update = 0;
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
- savings += prob_diff_update_savings(
- av1_inter_compound_mode_tree, cm->fc->inter_compound_mode_probs[i],
- cm->counts.inter_compound_mode[i], INTER_COMPOUND_MODES, probwt);
- }
- do_update = savings > savings_thresh;
- aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
- if (do_update) {
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
- prob_diff_update(
- av1_inter_compound_mode_tree, cm->fc->inter_compound_mode_probs[i],
- cm->counts.inter_compound_mode[i], INTER_COMPOUND_MODES, probwt, w);
- }
- }
-}
-#endif // CONFIG_EXT_INTER
+#endif
static int write_skip(const AV1_COMMON *cm, const MACROBLOCKD *xd,
int segment_id, const MODE_INFO *mi, aom_writer *w) {
@@ -500,35 +467,100 @@ static int write_skip(const AV1_COMMON *cm, const MACROBLOCKD *xd,
return 1;
} else {
const int skip = mi->mbmi.skip;
+#if CONFIG_NEW_MULTISYMBOL
+ FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+ const int ctx = av1_get_skip_context(xd);
+ aom_write_symbol(w, skip, ec_ctx->skip_cdfs[ctx], 2);
+#else
aom_write(w, skip, av1_get_skip_prob(cm, xd));
+#endif
return skip;
}
}
+static void write_is_inter(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+ int segment_id, aom_writer *w, const int is_inter) {
+ if (!segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+#if CONFIG_NEW_MULTISYMBOL
+ FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+ const int ctx = av1_get_intra_inter_context(xd);
+ aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[ctx], 2);
+#else
+ aom_write(w, is_inter, av1_get_intra_inter_prob(cm, xd));
+#endif
+ }
+}
+
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-static void write_motion_mode(const AV1_COMMON *cm, const MODE_INFO *mi,
- aom_writer *w) {
+static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd,
+ const MODE_INFO *mi, aom_writer *w) {
const MB_MODE_INFO *mbmi = &mi->mbmi;
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+ MOTION_MODE last_motion_mode_allowed =
+ motion_mode_allowed_wrapper(0,
+#if CONFIG_GLOBAL_MOTION
+ 0, cm->global_motion,
+#endif // CONFIG_GLOBAL_MOTION
+ mi);
+#else
MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#if CONFIG_GLOBAL_MOTION
0, cm->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
mi);
-
+#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
if (last_motion_mode_allowed == SIMPLE_TRANSLATION) return;
#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
if (last_motion_mode_allowed == OBMC_CAUSAL) {
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, mbmi->motion_mode == OBMC_CAUSAL,
+ xd->tile_ctx->obmc_cdf[mbmi->sb_type], 2);
+#else
aom_write(w, mbmi->motion_mode == OBMC_CAUSAL,
cm->fc->obmc_prob[mbmi->sb_type]);
+#endif
} else {
#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
- av1_write_token(w, av1_motion_mode_tree,
- cm->fc->motion_mode_prob[mbmi->sb_type],
- &motion_mode_encodings[mbmi->motion_mode]);
+ aom_write_symbol(w, mbmi->motion_mode,
+ xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
+ MOTION_MODES);
#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
}
#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
}
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+static void write_ncobmc_mode(MACROBLOCKD *xd, const MODE_INFO *mi,
+ aom_writer *w) {
+ const MB_MODE_INFO *mbmi = &mi->mbmi;
+ ADAPT_OVERLAP_BLOCK ao_block = adapt_overlap_block_lookup[mbmi->sb_type];
+ if (mbmi->motion_mode != NCOBMC_ADAPT_WEIGHT) return;
+
+#ifndef TRAINING_WEIGHTS
+ aom_write_symbol(w, mbmi->ncobmc_mode[0],
+ xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES);
+ if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) {
+ aom_write_symbol(w, mbmi->ncobmc_mode[1],
+ xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES);
+ }
+#else
+ int block;
+ for (block = 0; block < 4; ++block)
+ aom_write_symbol(w, mbmi->ncobmc_mode[0][block],
+ xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES);
+ if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) {
+ for (block = 0; block < 4; ++block)
+ aom_write_symbol(w, mbmi->ncobmc_mode[1][block],
+ xd->tile_ctx->ncobmc_mode_cdf[ao_block],
+ MAX_NCOBMC_MODES);
+ }
+#endif
+}
+#endif
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_DELTA_Q
@@ -538,13 +570,8 @@ static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd,
int abs = sign ? -delta_qindex : delta_qindex;
int rem_bits, thr;
int smallval = abs < DELTA_Q_SMALL ? 1 : 0;
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
(void)cm;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
- (void)xd;
-#endif
aom_write_symbol(w, AOMMIN(abs, DELTA_Q_SMALL), ec_ctx->delta_q_cdf,
DELTA_Q_PROBS + 1);
@@ -560,25 +587,6 @@ static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd,
}
}
-#if !CONFIG_EC_ADAPT
-static void update_delta_q_probs(AV1_COMMON *cm, aom_writer *w,
- FRAME_COUNTS *counts) {
- int k;
-#if CONFIG_TILE_GROUPS
- const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
-#if CONFIG_EXT_DELTA_Q
- if (!cm->delta_q_present_flag) return;
-#endif // CONFIG_EXT_DELTA_Q
- for (k = 0; k < DELTA_Q_PROBS; ++k) {
- av1_cond_prob_diff_update(w, &cm->fc->delta_q_prob[k], counts->delta_q[k],
- probwt);
- }
-}
-#endif // CONFIG_EC_ADAPT
-
#if CONFIG_EXT_DELTA_Q
static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
int delta_lflevel, aom_writer *w) {
@@ -586,13 +594,8 @@ static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
int abs = sign ? -delta_lflevel : delta_lflevel;
int rem_bits, thr;
int smallval = abs < DELTA_LF_SMALL ? 1 : 0;
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
(void)cm;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
- (void)xd;
-#endif
aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf,
DELTA_LF_PROBS + 1);
@@ -607,178 +610,32 @@ static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
aom_write_bit(w, sign);
}
}
-
-#if !CONFIG_EC_ADAPT
-static void update_delta_lf_probs(AV1_COMMON *cm, aom_writer *w,
- FRAME_COUNTS *counts) {
- int k;
-#if CONFIG_TILE_GROUPS
- const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
- if (!cm->delta_lf_present_flag) return;
- for (k = 0; k < DELTA_LF_PROBS; ++k) {
- av1_cond_prob_diff_update(w, &cm->fc->delta_lf_prob[k], counts->delta_lf[k],
- probwt);
- }
-}
-#endif // CONFIG_EC_ADAPT
#endif // CONFIG_EXT_DELTA_Q
#endif // CONFIG_DELTA_Q
+#if !CONFIG_NEW_MULTISYMBOL
static void update_skip_probs(AV1_COMMON *cm, aom_writer *w,
FRAME_COUNTS *counts) {
int k;
-#if CONFIG_TILE_GROUPS
const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
for (k = 0; k < SKIP_CONTEXTS; ++k) {
av1_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k],
probwt);
}
}
-
-#if !CONFIG_EC_ADAPT
-static void update_switchable_interp_probs(AV1_COMMON *cm, aom_writer *w,
- FRAME_COUNTS *counts) {
- int j;
- for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) {
-#if CONFIG_TILE_GROUPS
- const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
- prob_diff_update(
- av1_switchable_interp_tree, cm->fc->switchable_interp_prob[j],
- counts->switchable_interp[j], SWITCHABLE_FILTERS, probwt, w);
- }
-}
-#endif
-
-#if !CONFIG_EC_ADAPT
-#if CONFIG_EXT_TX
-static void update_ext_tx_probs(AV1_COMMON *cm, aom_writer *w) {
- const int savings_thresh = av1_cost_one(GROUP_DIFF_UPDATE_PROB) -
- av1_cost_zero(GROUP_DIFF_UPDATE_PROB);
- int i, j;
- int s;
-#if CONFIG_TILE_GROUPS
- const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
- for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
- int savings = 0;
- int do_update = 0;
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- if (!use_inter_ext_tx_for_txsize[s][i]) continue;
- savings += prob_diff_update_savings(
- av1_ext_tx_inter_tree[s], cm->fc->inter_ext_tx_prob[s][i],
- cm->counts.inter_ext_tx[s][i],
- num_ext_tx_set[ext_tx_set_type_inter[s]], probwt);
- }
- do_update = savings > savings_thresh;
- aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
- if (do_update) {
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- if (!use_inter_ext_tx_for_txsize[s][i]) continue;
- prob_diff_update(av1_ext_tx_inter_tree[s],
- cm->fc->inter_ext_tx_prob[s][i],
- cm->counts.inter_ext_tx[s][i],
- num_ext_tx_set[ext_tx_set_type_inter[s]], probwt, w);
- }
- }
- }
-
- for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
- int savings = 0;
- int do_update = 0;
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- if (!use_intra_ext_tx_for_txsize[s][i]) continue;
- for (j = 0; j < INTRA_MODES; ++j)
- savings += prob_diff_update_savings(
- av1_ext_tx_intra_tree[s], cm->fc->intra_ext_tx_prob[s][i][j],
- cm->counts.intra_ext_tx[s][i][j],
- num_ext_tx_set[ext_tx_set_type_intra[s]], probwt);
- }
- do_update = savings > savings_thresh;
- aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
- if (do_update) {
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- if (!use_intra_ext_tx_for_txsize[s][i]) continue;
- for (j = 0; j < INTRA_MODES; ++j)
- prob_diff_update(av1_ext_tx_intra_tree[s],
- cm->fc->intra_ext_tx_prob[s][i][j],
- cm->counts.intra_ext_tx[s][i][j],
- num_ext_tx_set[ext_tx_set_type_intra[s]], probwt, w);
- }
- }
- }
-}
-
-#else
-static void update_ext_tx_probs(AV1_COMMON *cm, aom_writer *w) {
- const int savings_thresh = av1_cost_one(GROUP_DIFF_UPDATE_PROB) -
- av1_cost_zero(GROUP_DIFF_UPDATE_PROB);
- int i, j;
-
- int savings = 0;
- int do_update = 0;
-#if CONFIG_TILE_GROUPS
- const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
#endif
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- for (j = 0; j < TX_TYPES; ++j)
- savings += prob_diff_update_savings(
- av1_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
- cm->counts.intra_ext_tx[i][j], TX_TYPES, probwt);
- }
- do_update = savings > savings_thresh;
- aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
- if (do_update) {
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- for (j = 0; j < TX_TYPES; ++j) {
- prob_diff_update(av1_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
- cm->counts.intra_ext_tx[i][j], TX_TYPES, probwt, w);
- }
- }
- }
- savings = 0;
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- savings +=
- prob_diff_update_savings(av1_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
- cm->counts.inter_ext_tx[i], TX_TYPES, probwt);
- }
- do_update = savings > savings_thresh;
- aom_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
- if (do_update) {
- for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
- prob_diff_update(av1_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
- cm->counts.inter_ext_tx[i], TX_TYPES, probwt, w);
- }
- }
-}
-#endif // CONFIG_EXT_TX
-#endif // !CONFIG_EC_ADAPT
#if CONFIG_PALETTE
static void pack_palette_tokens(aom_writer *w, const TOKENEXTRA **tp, int n,
int num) {
- int i;
const TOKENEXTRA *p = *tp;
-
- for (i = 0; i < num; ++i) {
- av1_write_token(
- w, av1_palette_color_index_tree[n - PALETTE_MIN_SIZE], p->context_tree,
- &palette_color_index_encodings[n - PALETTE_MIN_SIZE][p->token]);
+ write_uniform(w, n, p->token); // The first color index.
+ ++p;
+ --num;
+ for (int i = 0; i < num; ++i) {
+ aom_write_symbol(w, p->token, p->palette_cdf, n);
++p;
}
-
*tp = p;
}
#endif // CONFIG_PALETTE
@@ -930,8 +787,16 @@ static void pack_pvq_tokens(aom_writer *w, MACROBLOCK *const x,
int max_blocks_wide;
int max_blocks_high;
int step = (1 << tx_size);
+
+#if CONFIG_CHROMA_SUB8X8
+ const BLOCK_SIZE plane_bsize =
+ AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
+#elif CONFIG_CB4X4
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+#else
const BLOCK_SIZE plane_bsize =
- get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd);
+ get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
+#endif
adapt = x->daala_enc.state.adapt;
@@ -1030,7 +895,8 @@ static void pack_txb_tokens(aom_writer *w,
uint16_t eob = x->mbmi_ext->eobs[plane][block];
TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block],
x->mbmi_ext->dc_sign_ctx[plane][block] };
- av1_write_coeffs_txb(cm, xd, w, block, plane, tcoeff, eob, &txb_ctx);
+ av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, block, plane, tx_size,
+ tcoeff, eob, &txb_ctx);
#else
pack_pvq_tokens(w, x, xd, plane, bsize, tx_size);
#endif
@@ -1103,15 +969,30 @@ static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp,
token_stats->cost += tmp_token_stats.cost;
#endif
} else {
+#if CONFIG_RECT_TX_EXT
+ int is_qttx = plane_tx_size == quarter_txsize_lookup[plane_bsize];
+ const TX_SIZE sub_txs = is_qttx ? plane_tx_size : sub_tx_size_map[tx_size];
+#else
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+#endif
const int bsl = tx_size_wide_unit[sub_txs];
int i;
assert(bsl > 0);
for (i = 0; i < 4; ++i) {
+#if CONFIG_RECT_TX_EXT
+ int is_wide_tx = tx_size_wide_unit[sub_txs] > tx_size_high_unit[sub_txs];
+ const int offsetr =
+ is_qttx ? (is_wide_tx ? i * tx_size_high_unit[sub_txs] : 0)
+ : blk_row + (i >> 1) * bsl;
+ const int offsetc =
+ is_qttx ? (is_wide_tx ? 0 : i * tx_size_wide_unit[sub_txs])
+ : blk_col + (i & 0x01) * bsl;
+#else
const int offsetr = blk_row + (i >> 1) * bsl;
const int offsetc = blk_col + (i & 0x01) * bsl;
+#endif
const int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
@@ -1136,6 +1017,14 @@ static void write_segment_id(aom_writer *w, const struct segmentation *seg,
}
}
+#if CONFIG_NEW_MULTISYMBOL
+#define WRITE_REF_BIT(bname, pname) \
+ aom_write_symbol(w, bname, av1_get_pred_cdf_##pname(cm, xd), 2)
+#else
+#define WRITE_REF_BIT(bname, pname) \
+ aom_write(w, bname, av1_get_pred_prob_##pname(cm, xd))
+#endif
+
// This function encodes the reference frame
static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
aom_writer *w) {
@@ -1153,66 +1042,183 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
// does the feature use compound prediction or not
// (if not specified at the frame/segment level)
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
-#if SUB8X8_COMP_REF
- aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd));
-#else
+#if !SUB8X8_COMP_REF
if (mbmi->sb_type != BLOCK_4X4)
- aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd));
+#endif
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, is_compound, av1_get_reference_mode_cdf(cm, xd), 2);
+#else
+ aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd));
#endif
} else {
assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE));
}
if (is_compound) {
+#if CONFIG_EXT_COMP_REFS
+ const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
+ ? UNIDIR_COMP_REFERENCE
+ : BIDIR_COMP_REFERENCE;
+#if USE_UNI_COMP_REFS
+#if CONFIG_VAR_REFS
+ if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm))
+ if (L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm) || BWD_AND_ALT(cm))
+#endif // CONFIG_VAR_REFS
+ aom_write(w, comp_ref_type, av1_get_comp_reference_type_prob(cm, xd));
+#if CONFIG_VAR_REFS
+ else
+ assert(comp_ref_type == BIDIR_COMP_REFERENCE);
+ else
+ assert(comp_ref_type == UNIDIR_COMP_REFERENCE);
+#endif // CONFIG_VAR_REFS
+#else // !USE_UNI_COMP_REFS
+ // NOTE: uni-directional comp refs disabled
+ assert(comp_ref_type == BIDIR_COMP_REFERENCE);
+#endif // USE_UNI_COMP_REFS
+
+ if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
+ const int bit = mbmi->ref_frame[0] == BWDREF_FRAME;
+#if CONFIG_VAR_REFS
+ if ((L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm)) && BWD_AND_ALT(cm))
+#endif // CONFIG_VAR_REFS
+ aom_write(w, bit, av1_get_pred_prob_uni_comp_ref_p(cm, xd));
+
+ if (!bit) {
+ assert(mbmi->ref_frame[0] == LAST_FRAME);
+#if CONFIG_VAR_REFS
+ if (L_AND_L2(cm) && (L_AND_L3(cm) || L_AND_G(cm))) {
+#endif // CONFIG_VAR_REFS
+ const int bit1 = mbmi->ref_frame[1] == LAST3_FRAME ||
+ mbmi->ref_frame[1] == GOLDEN_FRAME;
+ aom_write(w, bit1, av1_get_pred_prob_uni_comp_ref_p1(cm, xd));
+
+ if (bit1) {
+#if CONFIG_VAR_REFS
+ if (L_AND_L3(cm) && L_AND_G(cm)) {
+#endif // CONFIG_VAR_REFS
+ const int bit2 = mbmi->ref_frame[1] == GOLDEN_FRAME;
+ aom_write(w, bit2, av1_get_pred_prob_uni_comp_ref_p2(cm, xd));
+#if CONFIG_VAR_REFS
+ }
+#endif // CONFIG_VAR_REFS
+ }
+#if CONFIG_VAR_REFS
+ }
+#endif // CONFIG_VAR_REFS
+ } else {
+ assert(mbmi->ref_frame[1] == ALTREF_FRAME);
+ }
+
+ return;
+ }
+
+ assert(comp_ref_type == BIDIR_COMP_REFERENCE);
+#endif // CONFIG_EXT_COMP_REFS
+
#if CONFIG_EXT_REFS
const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME ||
mbmi->ref_frame[0] == LAST3_FRAME);
- const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME;
-#else // CONFIG_EXT_REFS
- const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME;
-#endif // CONFIG_EXT_REFS
-
- aom_write(w, bit, av1_get_pred_prob_comp_ref_p(cm, xd));
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (L,L2) vs (L3,G) branch node in tree
+ if (L_OR_L2(cm) && L3_OR_G(cm))
+#endif // CONFIG_VAR_REFS
+ WRITE_REF_BIT(bit, comp_ref_p);
-#if CONFIG_EXT_REFS
if (!bit) {
- const int bit1 = mbmi->ref_frame[0] == LAST_FRAME;
- aom_write(w, bit1, av1_get_pred_prob_comp_ref_p1(cm, xd));
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (L) vs (L2) branch node in tree
+ if (L_AND_L2(cm)) {
+#endif // CONFIG_VAR_REFS
+ const int bit1 = mbmi->ref_frame[0] == LAST_FRAME;
+ WRITE_REF_BIT(bit1, comp_ref_p1);
+#if CONFIG_VAR_REFS
+ }
+#endif // CONFIG_VAR_REFS
} else {
- const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME;
- aom_write(w, bit2, av1_get_pred_prob_comp_ref_p2(cm, xd));
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (L3) vs (G) branch node in tree
+ if (L3_AND_G(cm)) {
+#endif // CONFIG_VAR_REFS
+ const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME;
+ WRITE_REF_BIT(bit2, comp_ref_p2);
+#if CONFIG_VAR_REFS
+ }
+#endif // CONFIG_VAR_REFS
}
- aom_write(w, bit_bwd, av1_get_pred_prob_comp_bwdref_p(cm, xd));
+
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (BWD) vs (ALT) branch node in tree
+ if (BWD_AND_ALT(cm)) {
+#endif // CONFIG_VAR_REFS
+ const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME;
+ WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
+#if CONFIG_VAR_REFS
+ }
+#endif // CONFIG_VAR_REFS
+
+#else // !CONFIG_EXT_REFS
+ const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME;
+ WRITE_REF_BIT(bit, comp_ref_p);
#endif // CONFIG_EXT_REFS
} else {
#if CONFIG_EXT_REFS
const int bit0 = (mbmi->ref_frame[0] == ALTREF_FRAME ||
mbmi->ref_frame[0] == BWDREF_FRAME);
- aom_write(w, bit0, av1_get_pred_prob_single_ref_p1(cm, xd));
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (L,L2,L3,G) vs (BWD,ALT) branch node in
+ // tree
+ if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm))
+#endif // CONFIG_VAR_REFS
+ WRITE_REF_BIT(bit0, single_ref_p1);
if (bit0) {
- const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME;
- aom_write(w, bit1, av1_get_pred_prob_single_ref_p2(cm, xd));
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (BWD) vs (ALT) branch node in tree
+ if (BWD_AND_ALT(cm)) {
+#endif // CONFIG_VAR_REFS
+ const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME;
+ WRITE_REF_BIT(bit1, single_ref_p2);
+#if CONFIG_VAR_REFS
+ }
+#endif // CONFIG_VAR_REFS
} else {
const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME ||
mbmi->ref_frame[0] == GOLDEN_FRAME);
- aom_write(w, bit2, av1_get_pred_prob_single_ref_p3(cm, xd));
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (L,L2) vs (L3,G) branch node in tree
+ if (L_OR_L2(cm) && L3_OR_G(cm))
+#endif // CONFIG_VAR_REFS
+ WRITE_REF_BIT(bit2, single_ref_p3);
if (!bit2) {
- const int bit3 = mbmi->ref_frame[0] != LAST_FRAME;
- aom_write(w, bit3, av1_get_pred_prob_single_ref_p4(cm, xd));
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (L) vs (L2) branch node in tree
+ if (L_AND_L2(cm)) {
+#endif // CONFIG_VAR_REFS
+ const int bit3 = mbmi->ref_frame[0] != LAST_FRAME;
+ WRITE_REF_BIT(bit3, single_ref_p4);
+#if CONFIG_VAR_REFS
+ }
+#endif // CONFIG_VAR_REFS
} else {
- const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME;
- aom_write(w, bit4, av1_get_pred_prob_single_ref_p5(cm, xd));
+#if CONFIG_VAR_REFS
+ // Test need to explicitly code (L3) vs (G) branch node in tree
+ if (L3_AND_G(cm)) {
+#endif // CONFIG_VAR_REFS
+ const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME;
+ WRITE_REF_BIT(bit4, single_ref_p5);
+#if CONFIG_VAR_REFS
+ }
+#endif // CONFIG_VAR_REFS
}
}
-#else // CONFIG_EXT_REFS
+#else // !CONFIG_EXT_REFS
const int bit0 = mbmi->ref_frame[0] != LAST_FRAME;
- aom_write(w, bit0, av1_get_pred_prob_single_ref_p1(cm, xd));
+ WRITE_REF_BIT(bit0, single_ref_p1);
if (bit0) {
const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME;
- aom_write(w, bit1, av1_get_pred_prob_single_ref_p2(cm, xd));
+ WRITE_REF_BIT(bit1, single_ref_p2);
}
#endif // CONFIG_EXT_REFS
}
@@ -1250,7 +1256,7 @@ static void write_filter_intra_mode_info(const AV1_COMMON *const cm,
(void)mi_col;
#endif // CONFIG_CB4X4
- if (mbmi->uv_mode == DC_PRED
+ if (mbmi->uv_mode == UV_DC_PRED
#if CONFIG_PALETTE
&& mbmi->palette_mode_info.palette_size[1] == 0
#endif // CONFIG_PALETTE
@@ -1277,7 +1283,7 @@ static void write_intra_angle_info(const MACROBLOCKD *xd,
#endif // CONFIG_INTRA_INTERP
(void)ec_ctx;
- if (bsize < BLOCK_8X8) return;
+ if (!av1_use_angle_delta(bsize)) return;
if (av1_is_directional_mode(mbmi->mode, bsize)) {
write_uniform(w, 2 * MAX_ANGLE_DELTA + 1,
@@ -1292,7 +1298,7 @@ static void write_intra_angle_info(const MACROBLOCKD *xd,
#endif // CONFIG_INTRA_INTERP
}
- if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
+ if (av1_is_directional_mode(get_uv_mode(mbmi->uv_mode), bsize)) {
write_uniform(w, 2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
}
@@ -1303,11 +1309,7 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
aom_writer *w) {
AV1_COMMON *const cm = &cpi->common;
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
-#endif
if (!av1_is_interp_needed(xd)) {
#if CONFIG_DUAL_FILTER
@@ -1485,19 +1487,21 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
if (mbmi->mode == DC_PRED) {
const int n = pmi->palette_size[0];
int palette_y_mode_ctx = 0;
- if (above_mi)
+ if (above_mi) {
palette_y_mode_ctx +=
(above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
- if (left_mi)
+ }
+ if (left_mi) {
palette_y_mode_ctx +=
(left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ }
aom_write(
w, n > 0,
av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_y_mode_ctx]);
if (n > 0) {
- av1_write_token(w, av1_palette_size_tree,
- av1_default_palette_y_size_prob[bsize - BLOCK_8X8],
- &palette_size_encodings[n - PALETTE_MIN_SIZE]);
+ aom_write_symbol(w, n - PALETTE_MIN_SIZE,
+ xd->tile_ctx->palette_y_size_cdf[bsize - BLOCK_8X8],
+ PALETTE_SIZES);
#if CONFIG_PALETTE_DELTA_ENCODING
write_palette_colors_y(xd, pmi, cm->bit_depth, w);
#else
@@ -1506,18 +1510,17 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
aom_write_literal(w, pmi->palette_colors[i], cm->bit_depth);
}
#endif // CONFIG_PALETTE_DELTA_ENCODING
- write_uniform(w, n, pmi->palette_first_color_idx[0]);
}
}
- if (mbmi->uv_mode == DC_PRED) {
+ if (mbmi->uv_mode == UV_DC_PRED) {
const int n = pmi->palette_size[1];
const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
aom_write(w, n > 0, av1_default_palette_uv_mode_prob[palette_uv_mode_ctx]);
if (n > 0) {
- av1_write_token(w, av1_palette_size_tree,
- av1_default_palette_uv_size_prob[bsize - BLOCK_8X8],
- &palette_size_encodings[n - PALETTE_MIN_SIZE]);
+ aom_write_symbol(w, n - PALETTE_MIN_SIZE,
+ xd->tile_ctx->palette_uv_size_cdf[bsize - BLOCK_8X8],
+ PALETTE_SIZES);
#if CONFIG_PALETTE_DELTA_ENCODING
write_palette_colors_uv(xd, pmi, cm->bit_depth, w);
#else
@@ -1532,7 +1535,6 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
cm->bit_depth);
}
#endif // CONFIG_PALETTE_DELTA_ENCODING
- write_uniform(w, n, pmi->palette_first_color_idx[1]);
}
}
}
@@ -1543,21 +1545,20 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
const int supertx_enabled,
#endif
#if CONFIG_TXK_SEL
- int block, int plane,
+ int blk_row, int blk_col, int block, int plane,
+ TX_SIZE tx_size,
#endif
aom_writer *w) {
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const int is_inter = is_inter_block(mbmi);
+#if !CONFIG_TXK_SEL
#if CONFIG_VAR_TX
const TX_SIZE tx_size = is_inter ? mbmi->min_tx_size : mbmi->tx_size;
#else
const TX_SIZE tx_size = mbmi->tx_size;
#endif // CONFIG_VAR_TX
-#if CONFIG_EC_ADAPT
+#endif // !CONFIG_TXK_SEL
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
-#endif
#if !CONFIG_TXK_SEL
TX_TYPE tx_type = mbmi->tx_type;
@@ -1565,7 +1566,8 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
// Only y plane's tx_type is transmitted
if (plane > 0) return;
PLANE_TYPE plane_type = get_plane_type(plane);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
#endif
if (!FIXED_TX_TYPE) {
@@ -1583,21 +1585,20 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
const int eset =
get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
+ // eset == 0 should correspond to a set with only DCT_DCT and there
+ // is no need to send the tx_type
+ assert(eset > 0);
if (is_inter) {
assert(ext_tx_used_inter[eset][tx_type]);
- if (eset > 0) {
- aom_write_symbol(w, av1_ext_tx_inter_ind[eset][tx_type],
- ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
- ext_tx_cnt_inter[eset]);
- }
+ aom_write_symbol(w, av1_ext_tx_inter_ind[eset][tx_type],
+ ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
+ ext_tx_cnt_inter[eset]);
} else if (ALLOW_INTRA_EXT_TX) {
assert(ext_tx_used_intra[eset][tx_type]);
- if (eset > 0) {
- aom_write_symbol(
- w, av1_ext_tx_intra_ind[eset][tx_type],
- ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
- ext_tx_cnt_intra[eset]);
- }
+ aom_write_symbol(
+ w, av1_ext_tx_intra_ind[eset][tx_type],
+ ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
+ ext_tx_cnt_intra[eset]);
}
}
#else
@@ -1632,36 +1633,30 @@ static void write_intra_mode(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize,
}
static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx,
- PREDICTION_MODE uv_mode, PREDICTION_MODE y_mode,
- aom_writer *w) {
- aom_write_symbol(w, av1_intra_mode_ind[uv_mode],
- frame_ctx->uv_mode_cdf[y_mode], INTRA_MODES);
+ UV_PREDICTION_MODE uv_mode,
+ PREDICTION_MODE y_mode, aom_writer *w) {
+ aom_write_symbol(w, av1_intra_mode_ind[get_uv_mode(uv_mode)],
+ frame_ctx->uv_mode_cdf[y_mode], UV_INTRA_MODES);
}
#if CONFIG_CFL
-static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, int skip, int ind,
+static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, int ind,
const CFL_SIGN_TYPE signs[CFL_SIGNS],
aom_writer *w) {
- if (skip) {
- assert(ind == 0);
+ // Check for uninitialized signs
+ if (cfl_alpha_codes[ind][CFL_PRED_U] == 0)
assert(signs[CFL_PRED_U] == CFL_SIGN_POS);
+ if (cfl_alpha_codes[ind][CFL_PRED_V] == 0)
assert(signs[CFL_PRED_V] == CFL_SIGN_POS);
- } else {
- // Check for uninitialized signs
- if (cfl_alpha_codes[ind][CFL_PRED_U] == 0)
- assert(signs[CFL_PRED_U] == CFL_SIGN_POS);
- if (cfl_alpha_codes[ind][CFL_PRED_V] == 0)
- assert(signs[CFL_PRED_V] == CFL_SIGN_POS);
-
- // Write a symbol representing a combination of alpha Cb and alpha Cr.
- aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
-
- // Signs are only signaled for nonzero codes.
- if (cfl_alpha_codes[ind][CFL_PRED_U] != 0)
- aom_write_bit(w, signs[CFL_PRED_U]);
- if (cfl_alpha_codes[ind][CFL_PRED_V] != 0)
- aom_write_bit(w, signs[CFL_PRED_V]);
- }
+
+ // Write a symbol representing a combination of alpha Cb and alpha Cr.
+ aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
+
+ // Signs are only signaled for nonzero codes.
+ if (cfl_alpha_codes[ind][CFL_PRED_U] != 0)
+ aom_write_bit(w, signs[CFL_PRED_U]);
+ if (cfl_alpha_codes[ind][CFL_PRED_V] != 0)
+ aom_write_bit(w, signs[CFL_PRED_V]);
}
#endif
@@ -1672,22 +1667,13 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif
aom_writer *w) {
AV1_COMMON *const cm = &cpi->common;
-#if CONFIG_DELTA_Q || CONFIG_EC_ADAPT
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
-#else
- const MACROBLOCK *x = &cpi->td.mb;
- const MACROBLOCKD *xd = &x->e_mbd;
-#endif
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
-#endif
const MODE_INFO *mi = xd->mi[0];
const struct segmentation *const seg = &cm->seg;
- struct segmentation_probs *const segp = &cm->fc->seg;
+ struct segmentation_probs *const segp = &ec_ctx->seg;
const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const PREDICTION_MODE mode = mbmi->mode;
@@ -1708,8 +1694,13 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
if (seg->update_map) {
if (seg->temporal_update) {
const int pred_flag = mbmi->seg_id_predicted;
+#if CONFIG_NEW_MULTISYMBOL
+ aom_cdf_prob *pred_cdf = av1_get_pred_cdf_seg_id(segp, xd);
+ aom_write_symbol(w, pred_flag, pred_cdf, 2);
+#else
aom_prob pred_prob = av1_get_pred_prob_seg_id(segp, xd);
aom_write(w, pred_flag, pred_prob);
+#endif
if (!pred_flag) write_segment_id(w, seg, segp, segment_id);
} else {
write_segment_id(w, seg, segp, segment_id);
@@ -1750,8 +1741,7 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#if CONFIG_SUPERTX
if (!supertx_enabled)
#endif // CONFIG_SUPERTX
- if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
- aom_write(w, is_inter, av1_get_intra_inter_prob(cm, xd));
+ write_is_inter(cm, xd, mbmi->segment_id, w, is_inter);
if (cm->tx_mode == TX_MODE_SELECT &&
#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
@@ -1779,6 +1769,15 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
for (idx = 0; idx < width; idx += bw)
write_tx_size_vartx(cm, xd, mbmi, max_tx_size, height != width, idy,
idx, w);
+#if CONFIG_RECT_TX_EXT
+ if (is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) &&
+ quarter_txsize_lookup[bsize] != max_tx_size &&
+ (mbmi->tx_size == quarter_txsize_lookup[bsize] ||
+ mbmi->tx_size == max_tx_size)) {
+ aom_write(w, mbmi->tx_size != max_tx_size,
+ cm->fc->quarter_tx_size_prob);
+ }
+#endif
} else {
set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, skip, xd);
write_selected_tx_size(cm, xd, w);
@@ -1813,9 +1812,8 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif // CONFIG_CB4X4
#if CONFIG_CFL
- if (mbmi->uv_mode == DC_PRED) {
- write_cfl_alphas(ec_ctx, mbmi->skip, mbmi->cfl_alpha_idx,
- mbmi->cfl_alpha_signs, w);
+ if (mbmi->uv_mode == UV_DC_PRED) {
+ write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w);
}
#endif
@@ -1838,11 +1836,25 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
int16_t mode_ctx;
write_ref_frames(cm, xd, w);
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ // NOTE: Handle single ref comp mode
+ if (!is_compound)
+ aom_write(w, is_inter_singleref_comp_mode(mode),
+ av1_get_inter_mode_prob(cm, xd));
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ if (is_compound || is_inter_singleref_comp_mode(mode))
+#else // !CONFIG_COMPOUND_SINGLEREF
if (is_compound)
+#endif // CONFIG_COMPOUND_SINGLEREF
mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
else
#endif // CONFIG_EXT_INTER
+
mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
mbmi->ref_frame, bsize, -1);
@@ -1851,18 +1863,25 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
if (bsize >= BLOCK_8X8 || unify_bsize) {
#if CONFIG_EXT_INTER
if (is_inter_compound_mode(mode))
- write_inter_compound_mode(cm, w, mode, mode_ctx);
+ write_inter_compound_mode(cm, xd, w, mode, mode_ctx);
+#if CONFIG_COMPOUND_SINGLEREF
+ else if (is_inter_singleref_comp_mode(mode))
+ write_inter_singleref_comp_mode(xd, w, mode, mode_ctx);
+#endif // CONFIG_COMPOUND_SINGLEREF
else if (is_inter_singleref_mode(mode))
#endif // CONFIG_EXT_INTER
write_inter_mode(w, mode, ec_ctx, mode_ctx);
#if CONFIG_EXT_INTER
if (mode == NEWMV || mode == NEW_NEWMV ||
+#if CONFIG_COMPOUND_SINGLEREF
+ mbmi->mode == SR_NEW_NEWMV ||
+#endif // CONFIG_COMPOUND_SINGLEREF
have_nearmv_in_inter_mode(mode))
-#else
+#else // !CONFIG_EXT_INTER
if (mode == NEARMV || mode == NEWMV)
-#endif
- write_drl_idx(cm, mbmi, mbmi_ext, w);
+#endif // CONFIG_EXT_INTER
+ write_drl_idx(ec_ctx, mbmi, mbmi_ext, w);
else
assert(mbmi->ref_mv_idx == 0);
}
@@ -1873,6 +1892,10 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif // !CONFIG_DUAL_FILTER && !CONFIG_WARPED_MOTION
if (bsize < BLOCK_8X8 && !unify_bsize) {
+#if CONFIG_COMPOUND_SINGLEREF
+ /// NOTE: Single ref comp mode does not support sub8x8.
+ assert(is_compound || !is_inter_singleref_comp_mode(mbmi->mode));
+#endif // CONFIG_COMPOUND_SINGLEREF
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
@@ -1887,7 +1910,7 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
mbmi->ref_frame, bsize, j);
#if CONFIG_EXT_INTER
if (is_inter_compound_mode(b_mode))
- write_inter_compound_mode(cm, w, b_mode, mode_ctx);
+ write_inter_compound_mode(cm, xd, w, b_mode, mode_ctx);
else if (is_inter_singleref_mode(b_mode))
#endif // CONFIG_EXT_INTER
write_inter_mode(w, b_mode, ec_ctx, mode_ctx);
@@ -1969,6 +1992,22 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv,
&mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv, nmvc,
allow_hp);
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if ( // mode == SR_NEAREST_NEWMV ||
+ mode == SR_NEAR_NEWMV || mode == SR_ZERO_NEWMV ||
+ mode == SR_NEW_NEWMV) {
+ int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
+ int nmv_ctx =
+ av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
+ mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
+ nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx];
+ int_mv ref_mv = mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0];
+ if (mode == SR_NEW_NEWMV)
+ av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv, &ref_mv.as_mv, nmvc,
+ allow_hp);
+ av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv, &ref_mv.as_mv, nmvc,
+ allow_hp);
+#endif // CONFIG_COMPOUND_SINGLEREF
#endif // CONFIG_EXT_INTER
}
}
@@ -1981,13 +2020,23 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
cpi->common.allow_interintra_compound && is_interintra_allowed(mbmi)) {
const int interintra = mbmi->ref_frame[1] == INTRA_FRAME;
const int bsize_group = size_group_lookup[bsize];
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, interintra, ec_ctx->interintra_cdf[bsize_group], 2);
+#else
aom_write(w, interintra, cm->fc->interintra_prob[bsize_group]);
+#endif
if (interintra) {
- write_interintra_mode(w, mbmi->interintra_mode,
- cm->fc->interintra_mode_prob[bsize_group]);
+ aom_write_symbol(w, mbmi->interintra_mode,
+ ec_ctx->interintra_mode_cdf[bsize_group],
+ INTERINTRA_MODES);
if (is_interintra_wedge_used(bsize)) {
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, mbmi->use_wedge_interintra,
+ ec_ctx->wedge_interintra_cdf[bsize], 2);
+#else
aom_write(w, mbmi->use_wedge_interintra,
cm->fc->wedge_interintra_prob[bsize]);
+#endif
if (mbmi->use_wedge_interintra) {
aom_write_literal(w, mbmi->interintra_wedge_index,
get_wedge_bits_lookup(bsize));
@@ -2005,21 +2054,28 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#if CONFIG_EXT_INTER
if (mbmi->ref_frame[1] != INTRA_FRAME)
#endif // CONFIG_EXT_INTER
- write_motion_mode(cm, mi, w);
+ write_motion_mode(cm, xd, mi, w);
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+ write_ncobmc_mode(xd, mi, w);
+#endif
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_EXT_INTER
- if (cpi->common.reference_mode != SINGLE_REFERENCE &&
- is_inter_compound_mode(mbmi->mode)
+ if (
+#if CONFIG_COMPOUND_SINGLEREF
+ is_inter_anyref_comp_mode(mbmi->mode) &&
+#else // !CONFIG_COMPOUND_SINGLEREF
+ cpi->common.reference_mode != SINGLE_REFERENCE &&
+ is_inter_compound_mode(mbmi->mode) &&
+#endif // CONFIG_COMPOUND_SINGLEREF
#if CONFIG_MOTION_VAR
- && mbmi->motion_mode == SIMPLE_TRANSLATION
+ mbmi->motion_mode == SIMPLE_TRANSLATION &&
#endif // CONFIG_MOTION_VAR
- && is_any_masked_compound_used(bsize)) {
+ is_any_masked_compound_used(bsize)) {
#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
if (cm->allow_masked_compound) {
- av1_write_token(
- w, av1_compound_type_tree, cm->fc->compound_type_prob[bsize],
- &compound_type_encodings[mbmi->interinter_compound_type]);
+ aom_write_symbol(w, mbmi->interinter_compound_type,
+ ec_ctx->compound_type_cdf[bsize], COMPOUND_TYPES);
#if CONFIG_WEDGE
if (mbmi->interinter_compound_type == COMPOUND_WEDGE) {
aom_write_literal(w, mbmi->wedge_index, get_wedge_bits_lookup(bsize));
@@ -2061,8 +2117,9 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
#endif // CONFIG_INTRABC
const int mi_row, const int mi_col,
aom_writer *w) {
+ FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
const struct segmentation *const seg = &cm->seg;
- struct segmentation_probs *const segp = &cm->fc->seg;
+ struct segmentation_probs *const segp = &ec_ctx->seg;
const MODE_INFO *const mi = xd->mi[0];
const MODE_INFO *const above_mi = xd->above_mi;
const MODE_INFO *const left_mi = xd->left_mi;
@@ -2076,12 +2133,6 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
(void)mi_row;
(void)mi_col;
-#if CONFIG_EC_ADAPT
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
-#endif
-
if (seg->update_map) write_segment_id(w, seg, segp, mbmi->segment_id);
#if CONFIG_DELTA_Q
@@ -2110,18 +2161,17 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
write_skip(cm, xd, mbmi->segment_id, mi, w);
#endif
- if (cm->tx_mode == TX_MODE_SELECT &&
+ int enable_tx_size = cm->tx_mode == TX_MODE_SELECT &&
#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
#if CONFIG_RECT_TX
- bsize > BLOCK_4X4 &&
+ bsize > BLOCK_4X4 &&
#else
- bsize >= BLOCK_8X8 &&
+ bsize >= BLOCK_8X8 &&
#endif // CONFIG_RECT_TX
#else
- bsize >= BLOCK_8X8 &&
+ bsize >= BLOCK_8X8 &&
#endif
- !xd->lossless[mbmi->segment_id])
- write_selected_tx_size(cm, xd, w);
+ !xd->lossless[mbmi->segment_id];
#if CONFIG_INTRABC
if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools) {
@@ -2129,7 +2179,8 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
aom_write(w, use_intrabc, ec_ctx->intrabc_prob);
if (use_intrabc) {
assert(mbmi->mode == DC_PRED);
- assert(mbmi->uv_mode == DC_PRED);
+ assert(mbmi->uv_mode == UV_DC_PRED);
+ if (enable_tx_size && !mbmi->skip) write_selected_tx_size(cm, xd, w);
int_mv dv_ref = mbmi_ext->ref_mvs[INTRA_FRAME][0];
av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc);
#if CONFIG_EXT_TX && !CONFIG_TXK_SEL
@@ -2143,6 +2194,7 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
}
}
#endif // CONFIG_INTRABC
+ if (enable_tx_size) write_selected_tx_size(cm, xd, w);
if (bsize >= BLOCK_8X8 || unify_bsize) {
write_intra_mode_kf(cm, ec_ctx, mi, above_mi, left_mi, 0, mbmi->mode, w);
@@ -2169,9 +2221,8 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
#endif // CONFIG_CB4X4
#if CONFIG_CFL
- if (mbmi->uv_mode == DC_PRED) {
- write_cfl_alphas(ec_ctx, mbmi->skip, mbmi->cfl_alpha_idx,
- mbmi->cfl_alpha_signs, w);
+ if (mbmi->uv_mode == UV_DC_PRED) {
+ write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w);
}
#endif
@@ -2252,6 +2303,89 @@ static int rd_token_stats_mismatch(RD_STATS *rd_stats, TOKEN_STATS *token_stats,
}
#endif
+#if ENC_MISMATCH_DEBUG
+static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
+ AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ MODE_INFO *m;
+ xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
+ m = xd->mi[0];
+ if (is_inter_block(&m->mbmi)) {
+#define FRAME_TO_CHECK 1
+ if (cm->current_video_frame == FRAME_TO_CHECK /* && cm->show_frame == 1*/) {
+ const MB_MODE_INFO *const mbmi = &m->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+
+ int_mv mv[2];
+ int is_comp_ref = has_second_ref(&m->mbmi);
+ int ref;
+
+ for (ref = 0; ref < 1 + is_comp_ref; ++ref)
+ mv[ref].as_mv = m->mbmi.mv[ref].as_mv;
+
+ if (!is_comp_ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+ if (is_inter_singleref_comp_mode(m->mbmi.mode))
+ mv[1].as_mv = m->mbmi.mv[1].as_mv;
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mv[1].as_int = 0;
+ }
+ int interp_ctx[2] = { -1 };
+ int interp_filter[2] = { cm->interp_filter };
+ if (cm->interp_filter == SWITCHABLE) {
+ int dir;
+ for (dir = 0; dir < 2; ++dir) {
+ if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
+ interp_ctx[dir] = av1_get_pred_context_switchable_interp(xd, dir);
+ interp_filter[dir] = mbmi->interp_filter[dir];
+ } else {
+ interp_filter[dir] = EIGHTTAP_REGULAR;
+ }
+ }
+ }
+
+ MACROBLOCK *const x = &cpi->td.mb;
+ const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ const int16_t mode_ctx = av1_mode_context_analyzer(
+ mbmi_ext->mode_context, mbmi->ref_frame, bsize, -1);
+ const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
+ int16_t zeromv_ctx = -1;
+ int16_t refmv_ctx = -1;
+ if (mbmi->mode != NEWMV) {
+ zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+ if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
+ assert(mbmi->mode == ZEROMV);
+ }
+ if (mbmi->mode != ZEROMV) {
+ refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
+ if (mode_ctx & (1 << SKIP_NEARESTMV_OFFSET)) refmv_ctx = 6;
+ if (mode_ctx & (1 << SKIP_NEARMV_OFFSET)) refmv_ctx = 7;
+ if (mode_ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) refmv_ctx = 8;
+ }
+ }
+
+ int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+ printf(
+ "=== ENCODER ===: "
+ "Frame=%d, (mi_row,mi_col)=(%d,%d), mode=%d, bsize=%d, "
+ "show_frame=%d, mv[0]=(%d,%d), mv[1]=(%d,%d), ref[0]=%d, "
+ "ref[1]=%d, motion_mode=%d, inter_mode_ctx=%d, mode_ctx=%d, "
+ "interp_ctx=(%d,%d), interp_filter=(%d,%d), newmv_ctx=%d, "
+ "zeromv_ctx=%d, refmv_ctx=%d\n",
+ cm->current_video_frame, mi_row, mi_col, mbmi->mode, bsize,
+ cm->show_frame, mv[0].as_mv.row, mv[0].as_mv.col, mv[1].as_mv.row,
+ mv[1].as_mv.col, mbmi->ref_frame[0], mbmi->ref_frame[1],
+ mbmi->motion_mode, mbmi_ext->mode_context[ref_frame_type], mode_ctx,
+ interp_ctx[0], interp_ctx[1], interp_filter[0], interp_filter[1],
+ newmv_ctx, zeromv_ctx, refmv_ctx);
+ }
+ }
+}
+#endif // ENC_MISMATCH_DEBUG
+
static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile,
aom_writer *w,
#if CONFIG_SUPERTX
@@ -2265,7 +2399,8 @@ static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile,
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
- assert(m->mbmi.sb_type <= cm->sb_size);
+ assert(m->mbmi.sb_type <= cm->sb_size ||
+ (m->mbmi.sb_type >= BLOCK_4X16 && m->mbmi.sb_type <= BLOCK_32X8));
bh = mi_size_high[m->mbmi.sb_type];
bw = mi_size_wide[m->mbmi.sb_type];
@@ -2291,36 +2426,22 @@ static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile,
xd->left_txfm_context = xd->left_txfm_context_buffer +
((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2);
#endif
-#if CONFIG_DUAL_FILTER
+#if CONFIG_DUAL_FILTER || CONFIG_WARPED_MOTION
// has_subpel_mv_component needs the ref frame buffers set up to look
// up if they are scaled. has_subpel_mv_component is in turn needed by
// write_switchable_interp_filter, which is called by pack_inter_mode_mvs.
set_ref_ptrs(cm, xd, m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]);
-#endif // CONFIG_DUAL_FILTER
-#if 0
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(&m->mbmi) && is_inter_singleref_comp_mode(m->mbmi.mode))
+ xd->block_refs[1] = xd->block_refs[0];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif // CONFIG_DUAL_FILTER || CONFIG_WARPED_MOTION
+
+#if ENC_MISMATCH_DEBUG
// NOTE(zoeliu): For debug
- if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) {
- const PREDICTION_MODE mode = m->mbmi.mode;
- const int segment_id = m->mbmi.segment_id;
- const BLOCK_SIZE bsize = m->mbmi.sb_type;
-
- // For sub8x8, simply dump out the first sub8x8 block info
- const PREDICTION_MODE b_mode =
- (bsize < BLOCK_8X8) ? m->bmi[0].as_mode : -1;
- const int mv_x = (bsize < BLOCK_8X8) ?
- m->bmi[0].as_mv[0].as_mv.row : m->mbmi.mv[0].as_mv.row;
- const int mv_y = (bsize < BLOCK_8X8) ?
- m->bmi[0].as_mv[0].as_mv.col : m->mbmi.mv[0].as_mv.col;
-
- printf("Before pack_inter_mode_mvs(): "
- "Frame=%d, (mi_row,mi_col)=(%d,%d), "
- "mode=%d, segment_id=%d, bsize=%d, b_mode=%d, "
- "mv[0]=(%d, %d), ref[0]=%d, ref[1]=%d\n",
- cm->current_video_frame, mi_row, mi_col,
- mode, segment_id, bsize, b_mode, mv_x, mv_y,
- m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]);
- }
-#endif // 0
+ enc_dump_logs(cpi, mi_row, mi_col);
+#endif // ENC_MISMATCH_DEBUG
+
pack_inter_mode_mvs(cpi, mi_row, mi_col,
#if CONFIG_SUPERTX
supertx_enabled,
@@ -2335,7 +2456,8 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
int mi_col) {
AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- MODE_INFO *const m = xd->mi[0];
+ const int mi_offset = mi_row * cm->mi_stride + mi_col;
+ MODE_INFO *const m = *(cm->mi_grid_visible + mi_offset);
MB_MODE_INFO *const mbmi = &m->mbmi;
int plane;
int bh, bw;
@@ -2344,9 +2466,10 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
(void)tok;
(void)tok_end;
#endif
- xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
+ xd->mi = cm->mi_grid_visible + mi_offset;
- assert(mbmi->sb_type <= cm->sb_size);
+ assert(mbmi->sb_type <= cm->sb_size ||
+ (mbmi->sb_type >= BLOCK_4X16 && mbmi->sb_type <= BLOCK_32X8));
bh = mi_size_high[mbmi->sb_type];
bw = mi_size_wide[mbmi->sb_type];
@@ -2371,7 +2494,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows,
&cols);
assert(*tok < tok_end);
- pack_palette_tokens(w, tok, palette_size_plane, rows * cols - 1);
+ pack_palette_tokens(w, tok, palette_size_plane, rows * cols);
assert(*tok < tok_end + mbmi->skip);
}
}
@@ -2382,7 +2505,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
const struct macroblockd_plane *const pd_y = &xd->plane[0];
const struct macroblockd_plane *const pd_c = &xd->plane[1];
const TX_SIZE tx_log2_y = mbmi->tx_size;
- const TX_SIZE tx_log2_c = get_uv_tx_size(mbmi, pd_c);
+ const TX_SIZE tx_log2_c = av1_get_uv_tx_size(mbmi, pd_c);
const int tx_sz_y = (1 << tx_log2_y);
const int tx_sz_c = (1 << tx_log2_c);
@@ -2469,13 +2592,11 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
#if CONFIG_VAR_TX
const struct macroblockd_plane *const pd = &xd->plane[plane];
BLOCK_SIZE bsize = mbmi->sb_type;
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-#else
+#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
-#endif
+#elif CONFIG_CB4X4
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
#else
const BLOCK_SIZE plane_bsize =
get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd);
@@ -2489,6 +2610,15 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
TOKEN_STATS token_stats;
init_token_stats(&token_stats);
+ const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd);
+ int mu_blocks_wide =
+ block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
+ int mu_blocks_high =
+ block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
+
+ mu_blocks_wide = AOMMIN(num_4x4_w, mu_blocks_wide);
+ mu_blocks_high = AOMMIN(num_4x4_h, mu_blocks_high);
+
if (is_inter_block(mbmi)) {
const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, plane_bsize);
int block = 0;
@@ -2496,19 +2626,27 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
const int bkw = tx_size_wide_unit[max_tx_size];
const int bkh = tx_size_high_unit[max_tx_size];
- for (row = 0; row < num_4x4_h; row += bkh) {
- for (col = 0; col < num_4x4_w; col += bkw) {
- pack_txb_tokens(w,
+ for (row = 0; row < num_4x4_h; row += mu_blocks_high) {
+ const int unit_height = AOMMIN(mu_blocks_high + row, num_4x4_h);
+ for (col = 0; col < num_4x4_w; col += mu_blocks_wide) {
+ int blk_row, blk_col;
+ const int unit_width = AOMMIN(mu_blocks_wide + col, num_4x4_w);
+ for (blk_row = row; blk_row < unit_height; blk_row += bkh) {
+ for (blk_col = col; blk_col < unit_width; blk_col += bkw) {
+ pack_txb_tokens(w,
#if CONFIG_LV_MAP
- cm,
+ cm,
#endif
- tok, tok_end,
+ tok, tok_end,
#if CONFIG_PVQ || CONFIG_LV_MAP
- x,
+ x,
#endif
- xd, mbmi, plane, plane_bsize, cm->bit_depth, block,
- row, col, max_tx_size, &token_stats);
- block += step;
+ xd, mbmi, plane, plane_bsize, cm->bit_depth,
+ block, blk_row, blk_col, max_tx_size,
+ &token_stats);
+ block += step;
+ }
+ }
}
}
#if CONFIG_RD_DEBUG
@@ -2522,22 +2660,32 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
#if CONFIG_LV_MAP
av1_write_coeffs_mb(cm, x, w, plane);
#else
- TX_SIZE tx = get_tx_size(plane, xd);
+ const TX_SIZE tx = av1_get_tx_size(plane, xd);
const int bkw = tx_size_wide_unit[tx];
const int bkh = tx_size_high_unit[tx];
- for (row = 0; row < num_4x4_h; row += bkh) {
- for (col = 0; col < num_4x4_w; col += bkw) {
+ int blk_row, blk_col;
+
+ for (row = 0; row < num_4x4_h; row += mu_blocks_high) {
+ for (col = 0; col < num_4x4_w; col += mu_blocks_wide) {
+ const int unit_height = AOMMIN(mu_blocks_high + row, num_4x4_h);
+ const int unit_width = AOMMIN(mu_blocks_wide + col, num_4x4_w);
+
+ for (blk_row = row; blk_row < unit_height; blk_row += bkh) {
+ for (blk_col = col; blk_col < unit_width; blk_col += bkw) {
#if !CONFIG_PVQ
- pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx, &token_stats);
+ pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx,
+ &token_stats);
#else
- pack_pvq_tokens(w, x, xd, plane, bsize, tx);
+ pack_pvq_tokens(w, x, xd, plane, bsize, tx);
#endif
+ }
+ }
}
}
#endif // CONFIG_LV_MAP
}
#else
- TX_SIZE tx = get_tx_size(plane, xd);
+ const TX_SIZE tx = av1_get_tx_size(plane, xd);
TOKEN_STATS token_stats;
#if !CONFIG_PVQ
init_token_stats(&token_stats);
@@ -2570,7 +2718,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
#endif // CONFIG_COEF_INTERLEAVE
}
-#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
+#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
static void write_tokens_sb(AV1_COMP *cpi, const TileInfo *const tile,
aom_writer *w, const TOKENEXTRA **tok,
const TOKENEXTRA *const tok_end, int mi_row,
@@ -2656,7 +2804,7 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
supertx_enabled,
#endif
mi_row, mi_col);
-#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
+#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
(void)tok;
(void)tok_end;
#else
@@ -2688,12 +2836,8 @@ static void write_partition(const AV1_COMMON *const cm,
const aom_prob *const probs = cm->fc->partition_prob[ctx];
#endif
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
(void)cm;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
-#endif
if (!is_partition_point) return;
@@ -2738,6 +2882,10 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
const int hbs = mi_size_wide[bsize] / 2;
+#if CONFIG_EXT_PARTITION_TYPES
+ const int quarter_step = mi_size_wide[bsize] / 4;
+ int i;
+#endif
const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
const BLOCK_SIZE subsize = get_subsize(bsize, partition);
#if CONFIG_CB4X4
@@ -2843,6 +2991,24 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
mi_row + hbs, mi_col + hbs);
break;
+ case PARTITION_HORZ_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+ if (i > 0 && this_mi_row >= cm->mi_rows) break;
+
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ this_mi_row, mi_col);
+ }
+ break;
+ case PARTITION_VERT_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, this_mi_col);
+ }
+ break;
#endif // CONFIG_EXT_PARTITION_TYPES
default: assert(0);
}
@@ -2865,17 +3031,15 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
assert(mbmi->segment_id_supertx < MAX_SEGMENTS);
skip = write_skip(cm, xd, mbmi->segment_id_supertx, xd->mi[0], w);
+
+ FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+
#if CONFIG_EXT_TX
if (get_ext_tx_types(supertx_size, bsize, 1, cm->reduced_tx_set_used) > 1 &&
!skip) {
const int eset =
get_ext_tx_set(supertx_size, bsize, 1, cm->reduced_tx_set_used);
if (eset > 0) {
-#if CONFIG_EC_ADAPT
- FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *ec_ctx = cm->fc;
-#endif
aom_write_symbol(w, av1_ext_tx_inter_ind[eset][mbmi->tx_type],
ec_ctx->inter_ext_tx_cdf[eset][supertx_size],
ext_tx_cnt_inter[eset]);
@@ -2883,9 +3047,8 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
}
#else
if (supertx_size < TX_32X32 && !skip) {
- av1_write_token(w, av1_ext_tx_tree,
- cm->fc->inter_ext_tx_prob[supertx_size],
- &ext_tx_encodings[mbmi->tx_type]);
+ aom_write_symbol(w, mbmi->tx_type, ec_ctx->inter_ext_tx_cdf[supertx_size],
+ TX_TYPES);
}
#endif // CONFIG_EXT_TX
@@ -2900,7 +3063,7 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
int row, col;
- TX_SIZE tx = get_tx_size(plane, xd);
+ const TX_SIZE tx = av1_get_tx_size(plane, xd);
BLOCK_SIZE txb_size = txsize_to_bsize[tx];
const int stepr = tx_size_high_unit[txb_size];
@@ -2934,11 +3097,24 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_CDEF
- if (bsize == cm->sb_size && !sb_all_skip(cm, mi_row, mi_col) &&
- cm->cdef_bits != 0) {
- aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
- ->mbmi.cdef_strength,
- cm->cdef_bits);
+ if (bsize == cm->sb_size && cm->cdef_bits != 0 && !cm->all_lossless) {
+ int width_step = mi_size_wide[BLOCK_64X64];
+ int height_step = mi_size_high[BLOCK_64X64];
+ int width, height;
+ for (height = 0; (height < mi_size_high[cm->sb_size]) &&
+ (mi_row + height < cm->mi_rows);
+ height += height_step) {
+ for (width = 0; (width < mi_size_wide[cm->sb_size]) &&
+ (mi_col + width < cm->mi_cols);
+ width += width_step) {
+ if (!sb_all_skip(cm, mi_row + height, mi_col + width))
+ aom_write_literal(
+ w, cm->mi_grid_visible[(mi_row + height) * cm->mi_stride +
+ (mi_col + width)]
+ ->mbmi.cdef_strength,
+ cm->cdef_bits);
+ }
+ }
}
#endif
}
@@ -2955,12 +3131,8 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
int mi_row, mi_col;
#if CONFIG_DEPENDENT_HORZTILES
-#if CONFIG_TILE_GROUPS
if (!cm->dependent_horz_tiles || mi_row_start == 0 ||
tile->tg_horz_boundary) {
-#else
- if (!cm->dependent_horz_tiles || mi_row_start == 0) {
-#endif
av1_zero_above_context(cm, mi_col_start, mi_col_end);
}
#else
@@ -2986,7 +3158,7 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += cm->mib_size) {
write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, 0, mi_row, mi_col,
cm->sb_size);
-#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
+#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
write_tokens_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, cm->sb_size);
#endif
}
@@ -3000,214 +3172,6 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
#endif
}
-#if !CONFIG_LV_MAP
-#if !CONFIG_PVQ && !CONFIG_EC_ADAPT
-static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size,
- av1_coeff_stats *coef_branch_ct,
- av1_coeff_probs_model *coef_probs) {
- av1_coeff_count *coef_counts = cpi->td.rd_counts.coef_counts[tx_size];
- unsigned int(*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
- cpi->common.counts.eob_branch[tx_size];
- int i, j, k, l, m;
-#if CONFIG_RECT_TX
- assert(!is_rect_tx(tx_size));
-#endif // CONFIG_RECT_TX
-
- for (i = 0; i < PLANE_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- av1_tree_probs_from_distribution(av1_coef_tree,
- coef_branch_ct[i][j][k][l],
- coef_counts[i][j][k][l]);
- coef_branch_ct[i][j][k][l][0][1] =
- eob_branch_ct[i][j][k][l] - coef_branch_ct[i][j][k][l][0][0];
- for (m = 0; m < UNCONSTRAINED_NODES; ++m)
- coef_probs[i][j][k][l][m] =
- get_binary_prob(coef_branch_ct[i][j][k][l][m][0],
- coef_branch_ct[i][j][k][l][m][1]);
- }
- }
- }
- }
-}
-
-#if !CONFIG_EC_ADAPT
-static void update_coef_probs_common(aom_writer *const bc, AV1_COMP *cpi,
- TX_SIZE tx_size,
- av1_coeff_stats *frame_branch_ct,
- av1_coeff_probs_model *new_coef_probs) {
- av1_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size];
- const aom_prob upd = DIFF_UPDATE_PROB;
-#if CONFIG_EC_ADAPT
- const int entropy_nodes_update = UNCONSTRAINED_NODES - 1;
-#else
- const int entropy_nodes_update = UNCONSTRAINED_NODES;
-#endif
- int i, j, k, l, t;
- int stepsize = cpi->sf.coeff_prob_appx_step;
-#if CONFIG_TILE_GROUPS
- const int probwt = cpi->common.num_tg;
-#else
- const int probwt = 1;
-#endif
-#if CONFIG_RECT_TX
- assert(!is_rect_tx(tx_size));
-#endif // CONFIG_RECT_TX
-
- switch (cpi->sf.use_fast_coef_updates) {
- case TWO_LOOP: {
- /* dry run to see if there is any update at all needed */
- int savings = 0;
- int update[2] = { 0, 0 };
- for (i = 0; i < PLANE_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- for (t = 0; t < entropy_nodes_update; ++t) {
- aom_prob newp = new_coef_probs[i][j][k][l][t];
- const aom_prob oldp = old_coef_probs[i][j][k][l][t];
- int s;
- int u = 0;
- if (t == PIVOT_NODE)
- s = av1_prob_diff_update_savings_search_model(
- frame_branch_ct[i][j][k][l][0], oldp, &newp, upd,
- stepsize, probwt);
- else
- s = av1_prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][l][t], oldp, &newp, upd, probwt);
-
- if (s > 0 && newp != oldp) u = 1;
- if (u)
- savings += s - (int)(av1_cost_zero(upd));
- else
- savings -= (int)(av1_cost_zero(upd));
- update[u]++;
- }
- }
- }
- }
- }
-
- /* Is coef updated at all */
- if (update[1] == 0 || savings < 0) {
- aom_write_bit(bc, 0);
- return;
- }
- aom_write_bit(bc, 1);
- for (i = 0; i < PLANE_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- // calc probs and branch cts for this frame only
- for (t = 0; t < entropy_nodes_update; ++t) {
- aom_prob newp = new_coef_probs[i][j][k][l][t];
- aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
- int s;
- int u = 0;
- if (t == PIVOT_NODE)
- s = av1_prob_diff_update_savings_search_model(
- frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd,
- stepsize, probwt);
- else
- s = av1_prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd,
- probwt);
- if (s > 0 && newp != *oldp) u = 1;
- aom_write(bc, u, upd);
- if (u) {
- /* send/use new probability */
- av1_write_prob_diff_update(bc, newp, *oldp);
- *oldp = newp;
- }
- }
- }
- }
- }
- }
- return;
- }
-
- case ONE_LOOP_REDUCED: {
- int updates = 0;
- int noupdates_before_first = 0;
- for (i = 0; i < PLANE_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- // calc probs and branch cts for this frame only
- for (t = 0; t < entropy_nodes_update; ++t) {
- aom_prob newp = new_coef_probs[i][j][k][l][t];
- aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
- int s;
- int u = 0;
- if (t == PIVOT_NODE) {
- s = av1_prob_diff_update_savings_search_model(
- frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd,
- stepsize, probwt);
- } else {
- s = av1_prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd,
- probwt);
- }
-
- if (s > 0 && newp != *oldp) u = 1;
- updates += u;
- if (u == 0 && updates == 0) {
- noupdates_before_first++;
- continue;
- }
- if (u == 1 && updates == 1) {
- int v;
- // first update
- aom_write_bit(bc, 1);
- for (v = 0; v < noupdates_before_first; ++v)
- aom_write(bc, 0, upd);
- }
- aom_write(bc, u, upd);
- if (u) {
- /* send/use new probability */
- av1_write_prob_diff_update(bc, newp, *oldp);
- *oldp = newp;
- }
- }
- }
- }
- }
- }
- if (updates == 0) {
- aom_write_bit(bc, 0); // no updates
- }
- return;
- }
- default: assert(0);
- }
-}
-#endif
-
-#if !CONFIG_EC_ADAPT
-static void update_coef_probs(AV1_COMP *cpi, aom_writer *w) {
- const TX_MODE tx_mode = cpi->common.tx_mode;
- const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
- TX_SIZE tx_size;
-
- for (tx_size = 0; tx_size <= max_tx_size; ++tx_size) {
- av1_coeff_stats frame_branch_ct[PLANE_TYPES];
- av1_coeff_probs_model frame_coef_probs[PLANE_TYPES];
- if (cpi->td.counts->tx_size_totals[tx_size] <= 20 || CONFIG_RD_DEBUG ||
- (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) {
- aom_write_bit(w, 0);
- } else {
- build_tree_distribution(cpi, tx_size, frame_branch_ct, frame_coef_probs);
- update_coef_probs_common(w, cpi, tx_size, frame_branch_ct,
- frame_coef_probs);
- }
- }
-}
-#endif // !CONFIG_EC_ADAPT
-#endif // !CONFIG_EC_ADAPT
-#endif // !CONFIG_LV_MAP
-
#if CONFIG_LOOP_RESTORATION
static void encode_restoration_mode(AV1_COMMON *cm,
struct aom_write_bit_buffer *wb) {
@@ -3257,6 +3221,23 @@ static void encode_restoration_mode(AV1_COMMON *cm,
wb, rsi->restoration_tilesize != (RESTORATION_TILESIZE_MAX >> 1));
}
}
+ int s = AOMMIN(cm->subsampling_x, cm->subsampling_y);
+ if (s && (cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
+ cm->rst_info[2].frame_restoration_type != RESTORE_NONE)) {
+ aom_wb_write_bit(wb, cm->rst_info[1].restoration_tilesize !=
+ cm->rst_info[0].restoration_tilesize);
+ assert(cm->rst_info[1].restoration_tilesize ==
+ cm->rst_info[0].restoration_tilesize ||
+ cm->rst_info[1].restoration_tilesize ==
+ (cm->rst_info[0].restoration_tilesize >> s));
+ assert(cm->rst_info[2].restoration_tilesize ==
+ cm->rst_info[1].restoration_tilesize);
+ } else if (!s) {
+ assert(cm->rst_info[1].restoration_tilesize ==
+ cm->rst_info[0].restoration_tilesize);
+ assert(cm->rst_info[2].restoration_tilesize ==
+ cm->rst_info[1].restoration_tilesize);
+ }
}
static void write_wiener_filter(WienerInfo *wiener_info,
@@ -3311,16 +3292,23 @@ static void write_sgrproj_filter(SgrprojInfo *sgrproj_info,
static void encode_restoration(AV1_COMMON *cm, aom_writer *wb) {
int i, p;
- const int ntiles = av1_get_rest_ntiles(cm->width, cm->height,
- cm->rst_info[0].restoration_tilesize,
- NULL, NULL, NULL, NULL);
+#if CONFIG_FRAME_SUPERRES
+ const int width = cm->superres_upscaled_width;
+ const int height = cm->superres_upscaled_height;
+#else
+ const int width = cm->width;
+ const int height = cm->height;
+#endif // CONFIG_FRAME_SUPERRES
+ const int ntiles =
+ av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize,
+ NULL, NULL, NULL, NULL);
WienerInfo ref_wiener_info;
SgrprojInfo ref_sgrproj_info;
set_default_wiener(&ref_wiener_info);
set_default_sgrproj(&ref_sgrproj_info);
const int ntiles_uv = av1_get_rest_ntiles(
- ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x),
- ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y),
+ ROUND_POWER_OF_TWO(width, cm->subsampling_x),
+ ROUND_POWER_OF_TWO(height, cm->subsampling_y),
cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL);
RestorationInfo *rsi = &cm->rst_info[0];
if (rsi->frame_restoration_type != RESTORE_NONE) {
@@ -3389,6 +3377,12 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
// Encode the loop filter level and type
aom_wb_write_literal(wb, lf->filter_level, 6);
+#if CONFIG_UV_LVL
+ if (lf->filter_level > 0) {
+ aom_wb_write_literal(wb, lf->filter_level_u, 6);
+ aom_wb_write_literal(wb, lf->filter_level_v, 6);
+ }
+#endif
aom_wb_write_literal(wb, lf->sharpness_level, 3);
// Write out loop filter deltas applied at the MB level based on mode or
@@ -3509,51 +3503,17 @@ static void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd,
}
}
-#if !CONFIG_EC_ADAPT
-static void update_seg_probs(AV1_COMP *cpi, aom_writer *w) {
- AV1_COMMON *cm = &cpi->common;
-#if CONFIG_TILE_GROUPS
- const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
-
- if (!cm->seg.enabled || !cm->seg.update_map) return;
-
- if (cm->seg.temporal_update) {
- int i;
-
- for (i = 0; i < PREDICTION_PROBS; i++)
- av1_cond_prob_diff_update(w, &cm->fc->seg.pred_probs[i],
- cm->counts.seg.pred[i], probwt);
-
- prob_diff_update(av1_segment_tree, cm->fc->seg.tree_probs,
- cm->counts.seg.tree_mispred, MAX_SEGMENTS, probwt, w);
- } else {
- prob_diff_update(av1_segment_tree, cm->fc->seg.tree_probs,
- cm->counts.seg.tree_total, MAX_SEGMENTS, probwt, w);
- }
-}
-#endif
-
-static void write_tx_mode(AV1_COMMON *cm, MACROBLOCKD *xd, TX_MODE *mode,
+static void write_tx_mode(AV1_COMMON *cm, TX_MODE *mode,
struct aom_write_bit_buffer *wb) {
- int i, all_lossless = 1;
-
- if (cm->seg.enabled) {
- for (i = 0; i < MAX_SEGMENTS; ++i) {
- if (!xd->lossless[i]) {
- all_lossless = 0;
- break;
- }
- }
- } else {
- all_lossless = xd->lossless[0];
- }
- if (all_lossless) {
+ if (cm->all_lossless) {
*mode = ONLY_4X4;
return;
}
+#if CONFIG_VAR_TX_NO_TX_MODE
+ (void)wb;
+ *mode = TX_MODE_SELECT;
+ return;
+#else
#if CONFIG_TX64X64
aom_wb_write_bit(wb, *mode == TX_MODE_SELECT);
if (*mode != TX_MODE_SELECT) {
@@ -3564,26 +3524,9 @@ static void write_tx_mode(AV1_COMMON *cm, MACROBLOCKD *xd, TX_MODE *mode,
aom_wb_write_bit(wb, *mode == TX_MODE_SELECT);
if (*mode != TX_MODE_SELECT) aom_wb_write_literal(wb, *mode, 2);
#endif // CONFIG_TX64X64
+#endif // CONFIG_VAR_TX_NO_TX_MODE
}
-#if !CONFIG_EC_ADAPT
-static void update_txfm_probs(AV1_COMMON *cm, aom_writer *w,
- FRAME_COUNTS *counts) {
-#if CONFIG_TILE_GROUPS
- const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
- if (cm->tx_mode == TX_MODE_SELECT) {
- int i, j;
- for (i = 0; i < MAX_TX_DEPTH; ++i)
- for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
- prob_diff_update(av1_tx_size_tree[i], cm->fc->tx_size_probs[i][j],
- counts->tx_size[i][j], i + 2, probwt, w);
- }
-}
-#endif
-
static void write_frame_interp_filter(InterpFilter filter,
struct aom_write_bit_buffer *wb) {
aom_wb_write_bit(wb, filter == SWITCHABLE);
@@ -3624,52 +3567,52 @@ static void fix_interp_filter(AV1_COMMON *cm, FRAME_COUNTS *counts) {
static void write_tile_info(const AV1_COMMON *const cm,
struct aom_write_bit_buffer *wb) {
#if CONFIG_EXT_TILE
- const int tile_width =
- ALIGN_POWER_OF_TWO(cm->tile_width, cm->mib_size_log2) >>
- cm->mib_size_log2;
- const int tile_height =
- ALIGN_POWER_OF_TWO(cm->tile_height, cm->mib_size_log2) >>
- cm->mib_size_log2;
-
- assert(tile_width > 0);
- assert(tile_height > 0);
+ if (cm->large_scale_tile) {
+ const int tile_width =
+ ALIGN_POWER_OF_TWO(cm->tile_width, cm->mib_size_log2) >>
+ cm->mib_size_log2;
+ const int tile_height =
+ ALIGN_POWER_OF_TWO(cm->tile_height, cm->mib_size_log2) >>
+ cm->mib_size_log2;
- aom_wb_write_literal(wb, cm->tile_encoding_mode, 1);
+ assert(tile_width > 0);
+ assert(tile_height > 0);
// Write the tile sizes
#if CONFIG_EXT_PARTITION
- if (cm->sb_size == BLOCK_128X128) {
- assert(tile_width <= 32);
- assert(tile_height <= 32);
- aom_wb_write_literal(wb, tile_width - 1, 5);
- aom_wb_write_literal(wb, tile_height - 1, 5);
- } else
+ if (cm->sb_size == BLOCK_128X128) {
+ assert(tile_width <= 32);
+ assert(tile_height <= 32);
+ aom_wb_write_literal(wb, tile_width - 1, 5);
+ aom_wb_write_literal(wb, tile_height - 1, 5);
+ } else {
#endif // CONFIG_EXT_PARTITION
- {
- assert(tile_width <= 64);
- assert(tile_height <= 64);
- aom_wb_write_literal(wb, tile_width - 1, 6);
- aom_wb_write_literal(wb, tile_height - 1, 6);
- }
-#if CONFIG_DEPENDENT_HORZTILES
- if (tile_height > 1) aom_wb_write_bit(wb, cm->dependent_horz_tiles);
-#endif
-#else
- int min_log2_tile_cols, max_log2_tile_cols, ones;
- av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+ assert(tile_width <= 64);
+ assert(tile_height <= 64);
+ aom_wb_write_literal(wb, tile_width - 1, 6);
+ aom_wb_write_literal(wb, tile_height - 1, 6);
+#if CONFIG_EXT_PARTITION
+ }
+#endif // CONFIG_EXT_PARTITION
+ } else {
+#endif // CONFIG_EXT_TILE
+ int min_log2_tile_cols, max_log2_tile_cols, ones;
+ av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
- // columns
- ones = cm->log2_tile_cols - min_log2_tile_cols;
- while (ones--) aom_wb_write_bit(wb, 1);
+ // columns
+ ones = cm->log2_tile_cols - min_log2_tile_cols;
+ while (ones--) aom_wb_write_bit(wb, 1);
- if (cm->log2_tile_cols < max_log2_tile_cols) aom_wb_write_bit(wb, 0);
+ if (cm->log2_tile_cols < max_log2_tile_cols) aom_wb_write_bit(wb, 0);
- // rows
- aom_wb_write_bit(wb, cm->log2_tile_rows != 0);
- if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->log2_tile_rows != 1);
+ // rows
+ aom_wb_write_bit(wb, cm->log2_tile_rows != 0);
+ if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->log2_tile_rows != 1);
#if CONFIG_DEPENDENT_HORZTILES
- if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->dependent_horz_tiles);
+ if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->dependent_horz_tiles);
#endif
+#if CONFIG_EXT_TILE
+ }
#endif // CONFIG_EXT_TILE
#if CONFIG_LOOPFILTERING_ACROSS_TILES
@@ -3782,16 +3725,9 @@ static INLINE int find_identical_tile(
}
#endif // CONFIG_EXT_TILE
-#if CONFIG_TILE_GROUPS
-static uint32_t write_tiles(AV1_COMP *const cpi,
- struct aom_write_bit_buffer *wb,
- unsigned int *max_tile_size,
- unsigned int *max_tile_col_size) {
-#else
static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
unsigned int *max_tile_size,
unsigned int *max_tile_col_size) {
-#endif
const AV1_COMMON *const cm = &cpi->common;
#if CONFIG_ANS
struct BufAnsCoder *buf_ans = &cpi->buf_ans;
@@ -3805,19 +3741,24 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
const int tile_cols = cm->tile_cols;
const int tile_rows = cm->tile_rows;
unsigned int tile_size = 0;
-#if CONFIG_TILE_GROUPS
+ const int have_tiles = tile_cols * tile_rows > 1;
+ struct aom_write_bit_buffer wb = { dst, 0 };
const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
- const int have_tiles = n_log2_tiles > 0;
uint32_t comp_hdr_size;
// Fixed size tile groups for the moment
const int num_tg_hdrs = cm->num_tg;
- const int tg_size = (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs;
+ const int tg_size =
+#if CONFIG_EXT_TILE
+ (cm->large_scale_tile)
+ ? 1
+ :
+#endif // CONFIG_EXT_TILE
+ (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs;
int tile_count = 0;
int tg_count = 1;
int tile_size_bytes = 4;
int tile_col_size_bytes;
uint32_t uncompressed_hdr_size = 0;
- uint8_t *dst = NULL;
struct aom_write_bit_buffer comp_hdr_len_wb;
struct aom_write_bit_buffer tg_params_wb;
struct aom_write_bit_buffer tile_size_bytes_wb;
@@ -3825,10 +3766,6 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
int mtu_size = cpi->oxcf.mtu;
int curr_tg_data_size = 0;
int hdr_size;
-#endif
-#if CONFIG_EXT_TILE
- const int have_tiles = tile_cols * tile_rows > 1;
-#endif // CONFIG_EXT_TILE
*max_tile_size = 0;
*max_tile_col_size = 0;
@@ -3837,282 +3774,274 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
// later compact the data if smaller headers are adequate.
#if CONFIG_EXT_TILE
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- TileInfo tile_info;
- const int is_last_col = (tile_col == tile_cols - 1);
- const uint32_t col_offset = total_size;
-
- av1_tile_set_col(&tile_info, cm, tile_col);
-
- // The last column does not have a column header
- if (!is_last_col) total_size += 4;
-
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
- const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
- const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
- const int data_offset = have_tiles ? 4 : 0;
-#if CONFIG_EC_ADAPT
- const int tile_idx = tile_row * tile_cols + tile_col;
- TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
-#endif
- av1_tile_set_row(&tile_info, cm, tile_row);
+ if (cm->large_scale_tile) {
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileInfo tile_info;
+ const int is_last_col = (tile_col == tile_cols - 1);
+ const uint32_t col_offset = total_size;
- buf->data = dst + total_size;
+ av1_tile_set_col(&tile_info, cm, tile_col);
- // Is CONFIG_EXT_TILE = 1, every tile in the row has a header,
- // even for the last one, unless no tiling is used at all.
- total_size += data_offset;
-#if CONFIG_EC_ADAPT
- // Initialise tile context from the frame context
- this_tile->tctx = *cm->fc;
- cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
-#endif
+ // The last column does not have a column header
+ if (!is_last_col) total_size += 4;
+
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
+ const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
+ const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
+ const int data_offset = have_tiles ? 4 : 0;
+ const int tile_idx = tile_row * tile_cols + tile_col;
+ TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
+ av1_tile_set_row(&tile_info, cm, tile_row);
+
+ buf->data = dst + total_size;
+
+ // Is CONFIG_EXT_TILE = 1, every tile in the row has a header,
+ // even for the last one, unless no tiling is used at all.
+ total_size += data_offset;
+ // Initialise tile context from the frame context
+ this_tile->tctx = *cm->fc;
+ cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
#if CONFIG_PVQ
- cpi->td.mb.pvq_q = &this_tile->pvq_q;
- cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
+ cpi->td.mb.pvq_q = &this_tile->pvq_q;
+ cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
#endif // CONFIG_PVQ
#if !CONFIG_ANS
- aom_start_encode(&mode_bc, buf->data + data_offset);
- write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
- assert(tok == tok_end);
- aom_stop_encode(&mode_bc);
- tile_size = mode_bc.pos;
+ aom_start_encode(&mode_bc, buf->data + data_offset);
+ write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
+ assert(tok == tok_end);
+ aom_stop_encode(&mode_bc);
+ tile_size = mode_bc.pos;
#else
- buf_ans_write_init(buf_ans, buf->data + data_offset);
- write_modes(cpi, &tile_info, buf_ans, &tok, tok_end);
- assert(tok == tok_end);
- aom_buf_ans_flush(buf_ans);
- tile_size = buf_ans_write_end(buf_ans);
+ buf_ans_write_init(buf_ans, buf->data + data_offset);
+ write_modes(cpi, &tile_info, buf_ans, &tok, tok_end);
+ assert(tok == tok_end);
+ aom_buf_ans_flush(buf_ans);
+ tile_size = buf_ans_write_end(buf_ans);
#endif // !CONFIG_ANS
#if CONFIG_PVQ
- cpi->td.mb.pvq_q = NULL;
+ cpi->td.mb.pvq_q = NULL;
#endif
- buf->size = tile_size;
-
- // Record the maximum tile size we see, so we can compact headers later.
- *max_tile_size = AOMMAX(*max_tile_size, tile_size);
-
- if (have_tiles) {
- // tile header: size of this tile, or copy offset
- uint32_t tile_header = tile_size;
+ buf->size = tile_size;
- // If the tile_encoding_mode is 1 (i.e. TILE_VR), check if this tile is
- // a copy tile.
- // Very low chances to have copy tiles on the key frames, so don't
- // search on key frames to reduce unnecessary search.
- if (cm->frame_type != KEY_FRAME && cm->tile_encoding_mode) {
- const int idendical_tile_offset =
- find_identical_tile(tile_row, tile_col, tile_buffers);
+ // Record the maximum tile size we see, so we can compact headers later.
+ *max_tile_size = AOMMAX(*max_tile_size, tile_size);
- if (idendical_tile_offset > 0) {
- tile_size = 0;
- tile_header = idendical_tile_offset | 0x80;
- tile_header <<= 24;
+ if (have_tiles) {
+ // tile header: size of this tile, or copy offset
+ uint32_t tile_header = tile_size;
+ const int tile_copy_mode =
+ ((AOMMAX(cm->tile_width, cm->tile_height) << MI_SIZE_LOG2) <= 256)
+ ? 1
+ : 0;
+
+ // If tile_copy_mode = 1, check if this tile is a copy tile.
+ // Very low chances to have copy tiles on the key frames, so don't
+ // search on key frames to reduce unnecessary search.
+ if (cm->frame_type != KEY_FRAME && tile_copy_mode) {
+ const int idendical_tile_offset =
+ find_identical_tile(tile_row, tile_col, tile_buffers);
+
+ if (idendical_tile_offset > 0) {
+ tile_size = 0;
+ tile_header = idendical_tile_offset | 0x80;
+ tile_header <<= 24;
+ }
}
+
+ mem_put_le32(buf->data, tile_header);
}
- mem_put_le32(buf->data, tile_header);
+ total_size += tile_size;
}
- total_size += tile_size;
- }
-
- if (!is_last_col) {
- uint32_t col_size = total_size - col_offset - 4;
- mem_put_le32(dst + col_offset, col_size);
+ if (!is_last_col) {
+ uint32_t col_size = total_size - col_offset - 4;
+ mem_put_le32(dst + col_offset, col_size);
- // If it is not final packing, record the maximum tile column size we see,
- // otherwise, check if the tile size is out of the range.
- *max_tile_col_size = AOMMAX(*max_tile_col_size, col_size);
+ // If it is not final packing, record the maximum tile column size we
+ // see, otherwise, check if the tile size is out of the range.
+ *max_tile_col_size = AOMMAX(*max_tile_col_size, col_size);
+ }
}
- }
-#else
-#if CONFIG_TILE_GROUPS
- write_uncompressed_header(cpi, wb);
+ } else {
+#endif // CONFIG_EXT_TILE
+ write_uncompressed_header(cpi, &wb);
#if CONFIG_EXT_REFS
- if (cm->show_existing_frame) {
- total_size = aom_wb_bytes_written(wb);
- return (uint32_t)total_size;
- }
+ if (cm->show_existing_frame) {
+ total_size = aom_wb_bytes_written(&wb);
+ return (uint32_t)total_size;
+ }
#endif // CONFIG_EXT_REFS
- // Write the tile length code
- tile_size_bytes_wb = *wb;
- aom_wb_write_literal(wb, 3, 2);
+ // Write the tile length code
+ tile_size_bytes_wb = wb;
+ aom_wb_write_literal(&wb, 3, 2);
- /* Write a placeholder for the number of tiles in each tile group */
- tg_params_wb = *wb;
- saved_offset = wb->bit_offset;
- if (have_tiles) {
- aom_wb_overwrite_literal(wb, 3, n_log2_tiles);
- aom_wb_overwrite_literal(wb, (1 << n_log2_tiles) - 1, n_log2_tiles);
- }
-
- /* Write a placeholder for the compressed header length */
- comp_hdr_len_wb = *wb;
- aom_wb_write_literal(wb, 0, 16);
-
- uncompressed_hdr_size = aom_wb_bytes_written(wb);
- dst = wb->bit_buffer;
- comp_hdr_size = write_compressed_header(cpi, dst + uncompressed_hdr_size);
- aom_wb_overwrite_literal(&comp_hdr_len_wb, (int)(comp_hdr_size), 16);
- hdr_size = uncompressed_hdr_size + comp_hdr_size;
- total_size += hdr_size;
-#endif
+ /* Write a placeholder for the number of tiles in each tile group */
+ tg_params_wb = wb;
+ saved_offset = wb.bit_offset;
+ if (have_tiles) {
+ aom_wb_overwrite_literal(&wb, 3, n_log2_tiles);
+ aom_wb_overwrite_literal(&wb, (1 << n_log2_tiles) - 1, n_log2_tiles);
+ }
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- TileInfo tile_info;
- const int is_last_row = (tile_row == tile_rows - 1);
- av1_tile_set_row(&tile_info, cm, tile_row);
+ /* Write a placeholder for the compressed header length */
+ comp_hdr_len_wb = wb;
+ aom_wb_write_literal(&wb, 0, 16);
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- const int tile_idx = tile_row * tile_cols + tile_col;
- TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
-#if CONFIG_PVQ || CONFIG_EC_ADAPT
- TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
-#endif
- const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
- const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
- const int is_last_col = (tile_col == tile_cols - 1);
- const int is_last_tile = is_last_col && is_last_row;
-#if !CONFIG_TILE_GROUPS
- (void)tile_idx;
-#else
+ uncompressed_hdr_size = aom_wb_bytes_written(&wb);
+ comp_hdr_size = write_compressed_header(cpi, dst + uncompressed_hdr_size);
+ aom_wb_overwrite_literal(&comp_hdr_len_wb, (int)(comp_hdr_size), 16);
+ hdr_size = uncompressed_hdr_size + comp_hdr_size;
+ total_size += hdr_size;
- if ((!mtu_size && tile_count > tg_size) ||
- (mtu_size && tile_count && curr_tg_data_size >= mtu_size)) {
- // New tile group
- tg_count++;
- // We've exceeded the packet size
- if (tile_count > 1) {
- /* The last tile exceeded the packet size. The tile group size
- should therefore be tile_count-1.
- Move the last tile and insert headers before it
- */
- uint32_t old_total_size = total_size - tile_size - 4;
- memmove(dst + old_total_size + hdr_size, dst + old_total_size,
- (tile_size + 4) * sizeof(uint8_t));
- // Copy uncompressed header
- memmove(dst + old_total_size, dst,
- uncompressed_hdr_size * sizeof(uint8_t));
- // Write the number of tiles in the group into the last uncompressed
- // header before the one we've just inserted
- aom_wb_overwrite_literal(&tg_params_wb, tile_idx - tile_count,
- n_log2_tiles);
- aom_wb_overwrite_literal(&tg_params_wb, tile_count - 2, n_log2_tiles);
- // Update the pointer to the last TG params
- tg_params_wb.bit_offset = saved_offset + 8 * old_total_size;
- // Copy compressed header
- memmove(dst + old_total_size + uncompressed_hdr_size,
- dst + uncompressed_hdr_size, comp_hdr_size * sizeof(uint8_t));
- total_size += hdr_size;
- tile_count = 1;
- curr_tg_data_size = hdr_size + tile_size + 4;
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ TileInfo tile_info;
+ const int is_last_row = (tile_row == tile_rows - 1);
+ av1_tile_set_row(&tile_info, cm, tile_row);
- } else {
- // We exceeded the packet size in just one tile
- // Copy uncompressed header
- memmove(dst + total_size, dst,
- uncompressed_hdr_size * sizeof(uint8_t));
- // Write the number of tiles in the group into the last uncompressed
- // header
- aom_wb_overwrite_literal(&tg_params_wb, tile_idx - tile_count,
- n_log2_tiles);
- aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1, n_log2_tiles);
- tg_params_wb.bit_offset = saved_offset + 8 * total_size;
- // Copy compressed header
- memmove(dst + total_size + uncompressed_hdr_size,
- dst + uncompressed_hdr_size, comp_hdr_size * sizeof(uint8_t));
- total_size += hdr_size;
- tile_count = 0;
- curr_tg_data_size = hdr_size;
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ const int tile_idx = tile_row * tile_cols + tile_col;
+ TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
+ TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
+ const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
+ const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
+ const int is_last_col = (tile_col == tile_cols - 1);
+ const int is_last_tile = is_last_col && is_last_row;
+
+ if ((!mtu_size && tile_count > tg_size) ||
+ (mtu_size && tile_count && curr_tg_data_size >= mtu_size)) {
+ // New tile group
+ tg_count++;
+ // We've exceeded the packet size
+ if (tile_count > 1) {
+ /* The last tile exceeded the packet size. The tile group size
+ should therefore be tile_count-1.
+ Move the last tile and insert headers before it
+ */
+ uint32_t old_total_size = total_size - tile_size - 4;
+ memmove(dst + old_total_size + hdr_size, dst + old_total_size,
+ (tile_size + 4) * sizeof(uint8_t));
+ // Copy uncompressed header
+ memmove(dst + old_total_size, dst,
+ uncompressed_hdr_size * sizeof(uint8_t));
+ // Write the number of tiles in the group into the last uncompressed
+ // header before the one we've just inserted
+ aom_wb_overwrite_literal(&tg_params_wb, tile_idx - tile_count,
+ n_log2_tiles);
+ aom_wb_overwrite_literal(&tg_params_wb, tile_count - 2,
+ n_log2_tiles);
+ // Update the pointer to the last TG params
+ tg_params_wb.bit_offset = saved_offset + 8 * old_total_size;
+ // Copy compressed header
+ memmove(dst + old_total_size + uncompressed_hdr_size,
+ dst + uncompressed_hdr_size,
+ comp_hdr_size * sizeof(uint8_t));
+ total_size += hdr_size;
+ tile_count = 1;
+ curr_tg_data_size = hdr_size + tile_size + 4;
+ } else {
+ // We exceeded the packet size in just one tile
+ // Copy uncompressed header
+ memmove(dst + total_size, dst,
+ uncompressed_hdr_size * sizeof(uint8_t));
+ // Write the number of tiles in the group into the last uncompressed
+ // header
+ aom_wb_overwrite_literal(&tg_params_wb, tile_idx - tile_count,
+ n_log2_tiles);
+ aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1,
+ n_log2_tiles);
+ tg_params_wb.bit_offset = saved_offset + 8 * total_size;
+ // Copy compressed header
+ memmove(dst + total_size + uncompressed_hdr_size,
+ dst + uncompressed_hdr_size,
+ comp_hdr_size * sizeof(uint8_t));
+ total_size += hdr_size;
+ tile_count = 0;
+ curr_tg_data_size = hdr_size;
+ }
}
- }
- tile_count++;
-#endif
- av1_tile_set_col(&tile_info, cm, tile_col);
+ tile_count++;
+ av1_tile_set_col(&tile_info, cm, tile_col);
-#if CONFIG_DEPENDENT_HORZTILES && CONFIG_TILE_GROUPS
- av1_tile_set_tg_boundary(&tile_info, cm, tile_row, tile_col);
+#if CONFIG_DEPENDENT_HORZTILES
+ av1_tile_set_tg_boundary(&tile_info, cm, tile_row, tile_col);
#endif
- buf->data = dst + total_size;
+ buf->data = dst + total_size;
- // The last tile does not have a header.
- if (!is_last_tile) total_size += 4;
+ // The last tile does not have a header.
+ if (!is_last_tile) total_size += 4;
-#if CONFIG_EC_ADAPT
- // Initialise tile context from the frame context
- this_tile->tctx = *cm->fc;
- cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
-#endif
+ // Initialise tile context from the frame context
+ this_tile->tctx = *cm->fc;
+ cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
#if CONFIG_PVQ
- cpi->td.mb.pvq_q = &this_tile->pvq_q;
- cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
+ cpi->td.mb.pvq_q = &this_tile->pvq_q;
+ cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
#endif // CONFIG_PVQ
#if CONFIG_ANS
- buf_ans_write_init(buf_ans, dst + total_size);
- write_modes(cpi, &tile_info, buf_ans, &tok, tok_end);
- assert(tok == tok_end);
- aom_buf_ans_flush(buf_ans);
- tile_size = buf_ans_write_end(buf_ans);
+ buf_ans_write_init(buf_ans, dst + total_size);
+ write_modes(cpi, &tile_info, buf_ans, &tok, tok_end);
+ assert(tok == tok_end);
+ aom_buf_ans_flush(buf_ans);
+ tile_size = buf_ans_write_end(buf_ans);
#else
aom_start_encode(&mode_bc, dst + total_size);
write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
#if !CONFIG_LV_MAP
+#if !CONFIG_PVQ
assert(tok == tok_end);
+#endif // !CONFIG_PVQ
#endif // !CONFIG_LV_MAP
aom_stop_encode(&mode_bc);
tile_size = mode_bc.pos;
#endif // CONFIG_ANS
#if CONFIG_PVQ
- cpi->td.mb.pvq_q = NULL;
+ cpi->td.mb.pvq_q = NULL;
#endif
- assert(tile_size > 0);
+ assert(tile_size > 0);
-#if CONFIG_TILE_GROUPS
- curr_tg_data_size += tile_size + 4;
-#endif
- buf->size = tile_size;
+ curr_tg_data_size += tile_size + 4;
+ buf->size = tile_size;
- if (!is_last_tile) {
- *max_tile_size = AOMMAX(*max_tile_size, tile_size);
- // size of this tile
- mem_put_le32(buf->data, tile_size);
- }
+ if (!is_last_tile) {
+ *max_tile_size = AOMMAX(*max_tile_size, tile_size);
+ // size of this tile
+ mem_put_le32(buf->data, tile_size);
+ }
- total_size += tile_size;
+ total_size += tile_size;
+ }
+ }
+ // Write the final tile group size
+ if (n_log2_tiles) {
+ aom_wb_overwrite_literal(&tg_params_wb, (1 << n_log2_tiles) - tile_count,
+ n_log2_tiles);
+ aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1, n_log2_tiles);
+ }
+ // Remux if possible. TODO (Thomas Davies): do this for more than one tile
+ // group
+ if (have_tiles && tg_count == 1) {
+ int data_size = total_size - (uncompressed_hdr_size + comp_hdr_size);
+ data_size = remux_tiles(cm, dst + uncompressed_hdr_size + comp_hdr_size,
+ data_size, *max_tile_size, *max_tile_col_size,
+ &tile_size_bytes, &tile_col_size_bytes);
+ total_size = data_size + uncompressed_hdr_size + comp_hdr_size;
+ aom_wb_overwrite_literal(&tile_size_bytes_wb, tile_size_bytes - 1, 2);
}
- }
-#if CONFIG_TILE_GROUPS
- // Write the final tile group size
- if (n_log2_tiles) {
- aom_wb_overwrite_literal(&tg_params_wb, (1 << n_log2_tiles) - tile_count,
- n_log2_tiles);
- aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1, n_log2_tiles);
- }
- // Remux if possible. TODO (Thomas Davies): do this for more than one tile
- // group
- if (have_tiles && tg_count == 1) {
- int data_size = total_size - (uncompressed_hdr_size + comp_hdr_size);
- data_size = remux_tiles(cm, dst + uncompressed_hdr_size + comp_hdr_size,
- data_size, *max_tile_size, *max_tile_col_size,
- &tile_size_bytes, &tile_col_size_bytes);
- total_size = data_size + uncompressed_hdr_size + comp_hdr_size;
- aom_wb_overwrite_literal(&tile_size_bytes_wb, tile_size_bytes - 1, 2);
- }
-#endif
+#if CONFIG_EXT_TILE
+ }
#endif // CONFIG_EXT_TILE
return (uint32_t)total_size;
}
static void write_render_size(const AV1_COMMON *cm,
struct aom_write_bit_buffer *wb) {
- const int scaling_active =
- cm->width != cm->render_width || cm->height != cm->render_height;
+ const int scaling_active = !av1_resize_unscaled(cm);
aom_wb_write_bit(wb, scaling_active);
if (scaling_active) {
aom_wb_write_literal(wb, cm->render_width - 1, 16);
@@ -4124,11 +4053,10 @@ static void write_render_size(const AV1_COMMON *cm,
static void write_superres_scale(const AV1_COMMON *const cm,
struct aom_write_bit_buffer *wb) {
// First bit is whether to to scale or not
- if (cm->superres_scale_numerator == SUPERRES_SCALE_DENOMINATOR) {
+ if (cm->superres_scale_numerator == SCALE_DENOMINATOR) {
aom_wb_write_bit(wb, 0); // no scaling
} else {
aom_wb_write_bit(wb, 1); // scaling, write scale factor
- // TODO(afergs): write factor to the compressed header instead
aom_wb_write_literal(
wb, cm->superres_scale_numerator - SUPERRES_SCALE_NUMERATOR_MIN,
SUPERRES_SCALE_BITS);
@@ -4138,13 +4066,15 @@ static void write_superres_scale(const AV1_COMMON *const cm,
static void write_frame_size(const AV1_COMMON *cm,
struct aom_write_bit_buffer *wb) {
- aom_wb_write_literal(wb, cm->width - 1, 16);
- aom_wb_write_literal(wb, cm->height - 1, 16);
-
- write_render_size(cm, wb);
#if CONFIG_FRAME_SUPERRES
+ aom_wb_write_literal(wb, cm->superres_upscaled_width - 1, 16);
+ aom_wb_write_literal(wb, cm->superres_upscaled_height - 1, 16);
write_superres_scale(cm, wb);
+#else
+ aom_wb_write_literal(wb, cm->width - 1, 16);
+ aom_wb_write_literal(wb, cm->height - 1, 16);
#endif // CONFIG_FRAME_SUPERRES
+ write_render_size(cm, wb);
}
static void write_frame_size_with_refs(AV1_COMP *cpi,
@@ -4157,20 +4087,26 @@ static void write_frame_size_with_refs(AV1_COMP *cpi,
YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame);
if (cfg != NULL) {
+#if CONFIG_FRAME_SUPERRES
+ found = cm->superres_upscaled_width == cfg->y_crop_width &&
+ cm->superres_upscaled_height == cfg->y_crop_height;
+#else
found =
cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height;
+#endif
found &= cm->render_width == cfg->render_width &&
cm->render_height == cfg->render_height;
}
aom_wb_write_bit(wb, found);
if (found) {
+#if CONFIG_FRAME_SUPERRES
+ write_superres_scale(cm, wb);
+#endif // CONFIG_FRAME_SUPERRES
break;
}
}
- if (!found) {
- write_frame_size(cm, wb);
- }
+ if (!found) write_frame_size(cm, wb);
}
static void write_sync_code(struct aom_write_bit_buffer *wb) {
@@ -4196,7 +4132,12 @@ static void write_bitdepth_colorspace_sampling(
assert(cm->bit_depth > AOM_BITS_8);
aom_wb_write_bit(wb, cm->bit_depth == AOM_BITS_10 ? 0 : 1);
}
+#if CONFIG_COLORSPACE_HEADERS
+ aom_wb_write_literal(wb, cm->color_space, 5);
+ aom_wb_write_literal(wb, cm->transfer_function, 5);
+#else
aom_wb_write_literal(wb, cm->color_space, 3);
+#endif
if (cm->color_space != AOM_CS_SRGB) {
// 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
aom_wb_write_bit(wb, cm->color_range);
@@ -4208,6 +4149,11 @@ static void write_bitdepth_colorspace_sampling(
} else {
assert(cm->subsampling_x == 1 && cm->subsampling_y == 1);
}
+#if CONFIG_COLORSPACE_HEADERS
+ if (cm->subsampling_x == 1 && cm->subsampling_y == 1) {
+ aom_wb_write_literal(wb, cm->chroma_sample_position, 2);
+ }
+#endif
} else {
assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
aom_wb_write_bit(wb, 0); // unused
@@ -4215,9 +4161,17 @@ static void write_bitdepth_colorspace_sampling(
}
#if CONFIG_REFERENCE_BUFFER
-void write_sequence_header(SequenceHeader *seq_params) {
+void write_sequence_header(
+#if CONFIG_EXT_TILE
+ AV1_COMMON *const cm,
+#endif // CONFIG_EXT_TILE
+ SequenceHeader *seq_params) {
/* Placeholder for actually writing to the bitstream */
- seq_params->frame_id_numbers_present_flag = FRAME_ID_NUMBERS_PRESENT_FLAG;
+ seq_params->frame_id_numbers_present_flag =
+#if CONFIG_EXT_TILE
+ cm->large_scale_tile ? 0 :
+#endif // CONFIG_EXT_TILE
+ FRAME_ID_NUMBERS_PRESENT_FLAG;
seq_params->frame_id_length_minus7 = FRAME_ID_LENGTH_MINUS7;
seq_params->delta_frame_id_length_minus2 = DELTA_FRAME_ID_LENGTH_MINUS2;
}
@@ -4236,7 +4190,11 @@ static void write_compound_tools(const AV1_COMMON *cm,
}
#endif // CONFIG_INTERINTRA
#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!frame_is_intra_only(cm)) {
+#else // !CONFIG_COMPOUND_SINGLEREF
if (!frame_is_intra_only(cm) && cm->reference_mode != SINGLE_REFERENCE) {
+#endif // CONFIG_COMPOUND_SINGLEREF
aom_wb_write_bit(wb, cm->allow_masked_compound);
} else {
assert(cm->allow_masked_compound == 0);
@@ -4252,13 +4210,21 @@ static void write_uncompressed_header(AV1_COMP *cpi,
#if CONFIG_REFERENCE_BUFFER
/* TODO: Move outside frame loop or inside key-frame branch */
- write_sequence_header(&cpi->seq_params);
+ write_sequence_header(
+#if CONFIG_EXT_TILE
+ cm,
+#endif // CONFIG_EXT_TILE
+ &cpi->seq_params);
#endif
aom_wb_write_literal(wb, AOM_FRAME_MARKER, 2);
write_profile(cm->profile, wb);
+#if CONFIG_EXT_TILE
+ aom_wb_write_literal(wb, cm->large_scale_tile, 1);
+#endif // CONFIG_EXT_TILE
+
#if CONFIG_EXT_REFS
// NOTE: By default all coded frames to be used as a reference
cm->is_reference_frame = 1;
@@ -4309,11 +4275,6 @@ static void write_uncompressed_header(AV1_COMP *cpi,
}
#endif
-#if CONFIG_FRAME_SUPERRES
- // TODO(afergs): Remove - this is just to stop superres from breaking
- cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR;
-#endif // CONFIG_FRAME_SUPERRES
-
if (cm->frame_type == KEY_FRAME) {
write_sync_code(wb);
write_bitdepth_colorspace_sampling(cm, wb);
@@ -4447,12 +4408,6 @@ static void write_uncompressed_header(AV1_COMP *cpi,
#endif // CONFIG_EXT_PARTITION
encode_loopfilter(cm, wb);
-#if CONFIG_CDEF
- encode_cdef(cm, wb);
-#endif
-#if CONFIG_LOOP_RESTORATION
- encode_restoration_mode(cm, wb);
-#endif // CONFIG_LOOP_RESTORATION
encode_quantization(cm, wb);
encode_segmentation(cm, xd, wb);
#if CONFIG_DELTA_Q
@@ -4485,8 +4440,15 @@ static void write_uncompressed_header(AV1_COMP *cpi,
}
}
#endif
-
- write_tx_mode(cm, xd, &cm->tx_mode, wb);
+#if CONFIG_CDEF
+ if (!cm->all_lossless) {
+ encode_cdef(cm, wb);
+ }
+#endif
+#if CONFIG_LOOP_RESTORATION
+ encode_restoration_mode(cm, wb);
+#endif // CONFIG_LOOP_RESTORATION
+ write_tx_mode(cm, &cm->tx_mode, wb);
if (cpi->allow_comp_inter_inter) {
const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
@@ -4513,13 +4475,13 @@ static void write_uncompressed_header(AV1_COMP *cpi,
#if CONFIG_GLOBAL_MOTION
static void write_global_motion_params(WarpedMotionParams *params,
WarpedMotionParams *ref_params,
- aom_prob *probs, aom_writer *w,
- int allow_hp) {
+ aom_writer *w, int allow_hp) {
TransformationType type = params->wmtype;
int trans_bits;
int trans_prec_diff;
- av1_write_token(w, av1_global_motion_types_tree, probs,
- &global_motion_types_encodings[type]);
+ aom_write_bit(w, type != IDENTITY);
+ if (type != IDENTITY) aom_write_literal(w, type - 1, GLOBAL_TYPE_BITS);
+
switch (type) {
case HOMOGRAPHY:
case HORTRAPEZOID:
@@ -4584,10 +4546,18 @@ static void write_global_motion_params(WarpedMotionParams *params,
static void write_global_motion(AV1_COMP *cpi, aom_writer *w) {
AV1_COMMON *const cm = &cpi->common;
int frame;
+ YV12_BUFFER_CONFIG *ref_buf;
for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
- write_global_motion_params(
- &cm->global_motion[frame], &cm->prev_frame->global_motion[frame],
- cm->fc->global_motion_types_prob, w, cm->allow_high_precision_mv);
+ ref_buf = get_ref_frame_buffer(cpi, frame);
+ if (cpi->source->y_crop_width == ref_buf->y_crop_width &&
+ cpi->source->y_crop_height == ref_buf->y_crop_height) {
+ write_global_motion_params(&cm->global_motion[frame],
+ &cm->prev_frame->global_motion[frame], w,
+ cm->allow_high_precision_mv);
+ } else {
+ assert(cm->global_motion[frame].wmtype == IDENTITY &&
+ "Invalid warp type for frames of different resolutions");
+ }
/*
printf("Frame %d/%d: Enc Ref %d (used %d): %d %d %d %d\n",
cm->current_video_frame, cm->show_frame, frame,
@@ -4605,15 +4575,17 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
#endif // CONFIG_SUPERTX
FRAME_CONTEXT *const fc = cm->fc;
- FRAME_COUNTS *counts = cpi->td.counts;
aom_writer *header_bc;
- int i, j;
+ int i;
+#if !CONFIG_NEW_MULTISYMBOL
+ FRAME_COUNTS *counts = cpi->td.counts;
+ int j;
+#endif
-#if CONFIG_TILE_GROUPS
const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
+ (void)probwt;
+ (void)i;
+ (void)fc;
#if CONFIG_ANS
int header_size;
@@ -4628,96 +4600,26 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
#if CONFIG_LOOP_RESTORATION
encode_restoration(cm, header_bc);
#endif // CONFIG_LOOP_RESTORATION
-#if !CONFIG_EC_ADAPT
- update_txfm_probs(cm, header_bc, counts);
-#endif
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
if (cm->tx_mode == TX_MODE_SELECT)
av1_cond_prob_diff_update(header_bc, &cm->fc->quarter_tx_size_prob,
cm->counts.quarter_tx_size, probwt);
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
#if CONFIG_LV_MAP
av1_write_txb_probs(cpi, header_bc);
-#else
-#if !CONFIG_PVQ
-#if !CONFIG_EC_ADAPT
- update_coef_probs(cpi, header_bc);
-#endif // !CONFIG_EC_ADAPT
-#endif // CONFIG_PVQ
#endif // CONFIG_LV_MAP
-#if CONFIG_VAR_TX
+#if CONFIG_VAR_TX && !CONFIG_NEW_MULTISYMBOL
update_txfm_partition_probs(cm, header_bc, counts, probwt);
#endif
+#if !CONFIG_NEW_MULTISYMBOL
update_skip_probs(cm, header_bc, counts);
-#if !CONFIG_EC_ADAPT && CONFIG_DELTA_Q
- update_delta_q_probs(cm, header_bc, counts);
-#if CONFIG_EXT_DELTA_Q
- update_delta_lf_probs(cm, header_bc, counts);
-#endif
-#endif
-#if !CONFIG_EC_ADAPT
- update_seg_probs(cpi, header_bc);
-
- for (i = 0; i < INTRA_MODES; ++i) {
- prob_diff_update(av1_intra_mode_tree, fc->uv_mode_prob[i],
- counts->uv_mode[i], INTRA_MODES, probwt, header_bc);
- }
-
-#if CONFIG_EXT_PARTITION_TYPES
- for (i = 0; i < PARTITION_PLOFFSET; ++i)
- prob_diff_update(av1_partition_tree, fc->partition_prob[i],
- counts->partition[i], PARTITION_TYPES, probwt, header_bc);
- for (; i < PARTITION_CONTEXTS_PRIMARY; ++i)
- prob_diff_update(av1_ext_partition_tree, fc->partition_prob[i],
- counts->partition[i], EXT_PARTITION_TYPES, probwt,
- header_bc);
-#else
- for (i = 0; i < PARTITION_CONTEXTS_PRIMARY; ++i)
- prob_diff_update(av1_partition_tree, fc->partition_prob[i],
- counts->partition[i], PARTITION_TYPES, probwt, header_bc);
-#endif // CONFIG_EXT_PARTITION_TYPES
-#if CONFIG_UNPOISON_PARTITION_CTX
- for (; i < PARTITION_CONTEXTS_PRIMARY + PARTITION_BLOCK_SIZES; ++i) {
- unsigned int ct[2] = { counts->partition[i][PARTITION_VERT],
- counts->partition[i][PARTITION_SPLIT] };
- assert(counts->partition[i][PARTITION_NONE] == 0);
- assert(counts->partition[i][PARTITION_HORZ] == 0);
- assert(fc->partition_prob[i][PARTITION_NONE] == 0);
- assert(fc->partition_prob[i][PARTITION_HORZ] == 0);
- av1_cond_prob_diff_update(header_bc, &fc->partition_prob[i][PARTITION_VERT],
- ct, probwt);
- }
- for (; i < PARTITION_CONTEXTS_PRIMARY + 2 * PARTITION_BLOCK_SIZES; ++i) {
- unsigned int ct[2] = { counts->partition[i][PARTITION_HORZ],
- counts->partition[i][PARTITION_SPLIT] };
- assert(counts->partition[i][PARTITION_NONE] == 0);
- assert(counts->partition[i][PARTITION_VERT] == 0);
- assert(fc->partition_prob[i][PARTITION_NONE] == 0);
- assert(fc->partition_prob[i][PARTITION_VERT] == 0);
- av1_cond_prob_diff_update(header_bc, &fc->partition_prob[i][PARTITION_HORZ],
- ct, probwt);
- }
#endif
-#if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
- for (i = 0; i < INTRA_FILTERS + 1; ++i)
- prob_diff_update(av1_intra_filter_tree, fc->intra_filter_probs[i],
- counts->intra_filter[i], INTRA_FILTERS, probwt, header_bc);
-#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-#endif // !CONFIG_EC_ADAPT
if (frame_is_intra_only(cm)) {
- av1_copy(cm->kf_y_prob, av1_kf_y_mode_prob);
av1_copy(cm->fc->kf_y_cdf, av1_kf_y_mode_cdf);
-#if !CONFIG_EC_ADAPT
- for (i = 0; i < INTRA_MODES; ++i)
- for (j = 0; j < INTRA_MODES; ++j)
- prob_diff_update(av1_intra_mode_tree, cm->kf_y_prob[i][j],
- counts->kf_y_mode[i][j], INTRA_MODES, probwt,
- header_bc);
-#endif // CONFIG_EC_ADAPT
#if CONFIG_INTRABC
if (cm->allow_screen_content_tools) {
av1_cond_prob_diff_update(header_bc, &fc->intrabc_prob,
@@ -4725,56 +4627,54 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
}
#endif
} else {
+#if !CONFIG_NEW_MULTISYMBOL
update_inter_mode_probs(cm, header_bc, counts);
+#endif
#if CONFIG_EXT_INTER
- update_inter_compound_mode_probs(cm, probwt, header_bc);
#if CONFIG_INTERINTRA
if (cm->reference_mode != COMPOUND_REFERENCE &&
cm->allow_interintra_compound) {
+#if !CONFIG_NEW_MULTISYMBOL
for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
if (is_interintra_allowed_bsize_group(i)) {
av1_cond_prob_diff_update(header_bc, &fc->interintra_prob[i],
cm->counts.interintra[i], probwt);
}
}
- for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
- prob_diff_update(
- av1_interintra_mode_tree, cm->fc->interintra_mode_prob[i],
- counts->interintra_mode[i], INTERINTRA_MODES, probwt, header_bc);
- }
-#if CONFIG_WEDGE
- for (i = 0; i < BLOCK_SIZES; i++) {
+#endif
+#if CONFIG_WEDGE && !CONFIG_NEW_MULTISYMBOL
+#if CONFIG_EXT_PARTITION_TYPES
+ int block_sizes_to_update = BLOCK_SIZES_ALL;
+#else
+ int block_sizes_to_update = BLOCK_SIZES;
+#endif
+ for (i = 0; i < block_sizes_to_update; i++) {
if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i))
av1_cond_prob_diff_update(header_bc, &fc->wedge_interintra_prob[i],
cm->counts.wedge_interintra[i], probwt);
}
-#endif // CONFIG_WEDGE
+#endif // CONFIG_WEDGE && CONFIG_NEW_MULTISYMBOL
}
#endif // CONFIG_INTERINTRA
-#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
- if (cm->reference_mode != SINGLE_REFERENCE && cm->allow_masked_compound) {
- for (i = 0; i < BLOCK_SIZES; i++)
- prob_diff_update(av1_compound_type_tree, fc->compound_type_prob[i],
- cm->counts.compound_interinter[i], COMPOUND_TYPES,
- probwt, header_bc);
- }
-#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i)
- prob_diff_update(av1_motion_mode_tree, fc->motion_mode_prob[i],
- counts->motion_mode[i], MOTION_MODES, probwt, header_bc);
-#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if !CONFIG_EC_ADAPT
- if (cm->interp_filter == SWITCHABLE)
- update_switchable_interp_probs(cm, header_bc, counts);
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+ for (i = ADAPT_OVERLAP_BLOCK_8X8; i < ADAPT_OVERLAP_BLOCKS; ++i) {
+ prob_diff_update(av1_ncobmc_mode_tree, fc->ncobmc_mode_prob[i],
+ counts->ncobmc_mode[i], MAX_NCOBMC_MODES, probwt,
+ header_bc);
+ }
#endif
+#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+#if !CONFIG_NEW_MULTISYMBOL
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
av1_cond_prob_diff_update(header_bc, &fc->intra_inter_prob[i],
counts->intra_inter[i], probwt);
+#endif
+#if !CONFIG_NEW_MULTISYMBOL
if (cpi->allow_comp_inter_inter) {
const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
if (use_hybrid_pred)
@@ -4791,7 +4691,19 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
}
}
}
+
if (cm->reference_mode != SINGLE_REFERENCE) {
+#if CONFIG_EXT_COMP_REFS
+ for (i = 0; i < COMP_REF_TYPE_CONTEXTS; i++)
+ av1_cond_prob_diff_update(header_bc, &fc->comp_ref_type_prob[i],
+ counts->comp_ref_type[i], probwt);
+
+ for (i = 0; i < UNI_COMP_REF_CONTEXTS; i++)
+ for (j = 0; j < (UNIDIR_COMP_REFS - 1); j++)
+ av1_cond_prob_diff_update(header_bc, &fc->uni_comp_ref_prob[i][j],
+ counts->uni_comp_ref[i][j], probwt);
+#endif // CONFIG_EXT_COMP_REFS
+
for (i = 0; i < REF_CONTEXTS; i++) {
#if CONFIG_EXT_REFS
for (j = 0; j < (FWD_REFS - 1); j++) {
@@ -4810,17 +4722,16 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
#endif // CONFIG_EXT_REFS
}
}
+#endif // CONFIG_NEW_MULTISYMBOL
-#if !CONFIG_EC_ADAPT
- for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
- prob_diff_update(av1_intra_mode_tree, cm->fc->y_mode_prob[i],
- counts->y_mode[i], INTRA_MODES, probwt, header_bc);
- }
-#endif
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ for (i = 0; i < COMP_INTER_MODE_CONTEXTS; i++)
+ av1_cond_prob_diff_update(header_bc, &fc->comp_inter_mode_prob[i],
+ counts->comp_inter_mode[i], probwt);
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if !CONFIG_NEW_MULTISYMBOL
av1_write_nmv_probs(cm, cm->allow_high_precision_mv, header_bc, counts->mv);
-#if !CONFIG_EC_ADAPT
- update_ext_tx_probs(cm, header_bc);
#endif
#if CONFIG_SUPERTX
if (!xd->lossless[0]) update_supertx_probs(cm, probwt, header_bc);
@@ -4829,12 +4740,6 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
write_global_motion(cpi, header_bc);
#endif // CONFIG_GLOBAL_MOTION
}
-#if !CONFIG_EC_ADAPT
- av1_coef_head_cdfs(fc);
- av1_coef_pareto_cdfs(fc);
- for (i = 0; i < NMV_CONTEXTS; ++i) av1_set_mv_cdfs(&fc->nmvc[i]);
- av1_set_mode_cdfs(cm);
-#endif // !CONFIG_EC_ADAPT
#if CONFIG_ANS
aom_buf_ans_flush(header_bc);
header_size = buf_ans_write_end(header_bc);
@@ -4881,16 +4786,23 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
const uint32_t max_tile_col_size,
int *const tile_size_bytes,
int *const tile_col_size_bytes) {
-// Choose the tile size bytes (tsb) and tile column size bytes (tcsb)
+ // Choose the tile size bytes (tsb) and tile column size bytes (tcsb)
+ int tsb;
+ int tcsb;
+
#if CONFIG_EXT_TILE
- // The top bit in the tile size field indicates tile copy mode, so we
- // have 1 less bit to code the tile size
- const int tsb = choose_size_bytes(max_tile_size, 1);
- const int tcsb = choose_size_bytes(max_tile_col_size, 0);
-#else
- const int tsb = choose_size_bytes(max_tile_size, 0);
- const int tcsb = 4; // This is ignored
- (void)max_tile_col_size;
+ if (cm->large_scale_tile) {
+ // The top bit in the tile size field indicates tile copy mode, so we
+ // have 1 less bit to code the tile size
+ tsb = choose_size_bytes(max_tile_size, 1);
+ tcsb = choose_size_bytes(max_tile_col_size, 0);
+ } else {
+#endif // CONFIG_EXT_TILE
+ tsb = choose_size_bytes(max_tile_size, 0);
+ tcsb = 4; // This is ignored
+ (void)max_tile_col_size;
+#if CONFIG_EXT_TILE
+ }
#endif // CONFIG_EXT_TILE
assert(tsb > 0);
@@ -4906,64 +4818,68 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
uint32_t rpos = 0;
#if CONFIG_EXT_TILE
- int tile_row;
- int tile_col;
-
- for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
- // All but the last column has a column header
- if (tile_col < cm->tile_cols - 1) {
- uint32_t tile_col_size = mem_get_le32(dst + rpos);
- rpos += 4;
+ if (cm->large_scale_tile) {
+ int tile_row;
+ int tile_col;
+
+ for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
+ // All but the last column has a column header
+ if (tile_col < cm->tile_cols - 1) {
+ uint32_t tile_col_size = mem_get_le32(dst + rpos);
+ rpos += 4;
+
+ // Adjust the tile column size by the number of bytes removed
+ // from the tile size fields.
+ tile_col_size -= (4 - tsb) * cm->tile_rows;
+
+ mem_put_varsize(dst + wpos, tcsb, tile_col_size);
+ wpos += tcsb;
+ }
- // Adjust the tile column size by the number of bytes removed
- // from the tile size fields.
- tile_col_size -= (4 - tsb) * cm->tile_rows;
+ for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
+ // All, including the last row has a header
+ uint32_t tile_header = mem_get_le32(dst + rpos);
+ rpos += 4;
+
+ // If this is a copy tile, we need to shift the MSB to the
+ // top bit of the new width, and there is no data to copy.
+ if (tile_header >> 31 != 0) {
+ if (tsb < 4) tile_header >>= 32 - 8 * tsb;
+ mem_put_varsize(dst + wpos, tsb, tile_header);
+ wpos += tsb;
+ } else {
+ mem_put_varsize(dst + wpos, tsb, tile_header);
+ wpos += tsb;
- mem_put_varsize(dst + wpos, tcsb, tile_col_size);
- wpos += tcsb;
+ memmove(dst + wpos, dst + rpos, tile_header);
+ rpos += tile_header;
+ wpos += tile_header;
+ }
+ }
}
+ } else {
+#endif // CONFIG_EXT_TILE
+ const int n_tiles = cm->tile_cols * cm->tile_rows;
+ int n;
- for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
- // All, including the last row has a header
- uint32_t tile_header = mem_get_le32(dst + rpos);
- rpos += 4;
+ for (n = 0; n < n_tiles; n++) {
+ int tile_size;
- // If this is a copy tile, we need to shift the MSB to the
- // top bit of the new width, and there is no data to copy.
- if (tile_header >> 31 != 0) {
- if (tsb < 4) tile_header >>= 32 - 8 * tsb;
- mem_put_varsize(dst + wpos, tsb, tile_header);
- wpos += tsb;
+ if (n == n_tiles - 1) {
+ tile_size = data_size - rpos;
} else {
- mem_put_varsize(dst + wpos, tsb, tile_header);
+ tile_size = mem_get_le32(dst + rpos);
+ rpos += 4;
+ mem_put_varsize(dst + wpos, tsb, tile_size);
wpos += tsb;
-
- memmove(dst + wpos, dst + rpos, tile_header);
- rpos += tile_header;
- wpos += tile_header;
}
- }
- }
-#else
- const int n_tiles = cm->tile_cols * cm->tile_rows;
- int n;
- for (n = 0; n < n_tiles; n++) {
- int tile_size;
+ memmove(dst + wpos, dst + rpos, tile_size);
- if (n == n_tiles - 1) {
- tile_size = data_size - rpos;
- } else {
- tile_size = mem_get_le32(dst + rpos);
- rpos += 4;
- mem_put_varsize(dst + wpos, tsb, tile_size);
- wpos += tsb;
+ rpos += tile_size;
+ wpos += tile_size;
}
-
- memmove(dst + wpos, dst + rpos, tile_size);
-
- rpos += tile_size;
- wpos += tile_size;
+#if CONFIG_EXT_TILE
}
#endif // CONFIG_EXT_TILE
@@ -4976,14 +4892,17 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) {
uint8_t *data = dst;
-#if !CONFIG_TILE_GROUPS
- uint32_t compressed_header_size;
+ uint32_t data_size;
+#if CONFIG_EXT_TILE
+ AV1_COMMON *const cm = &cpi->common;
+ uint32_t compressed_header_size = 0;
uint32_t uncompressed_header_size;
struct aom_write_bit_buffer saved_wb;
-#endif
- uint32_t data_size;
struct aom_write_bit_buffer wb = { data, 0 };
-
+ const int have_tiles = cm->tile_cols * cm->tile_rows > 1;
+ int tile_size_bytes;
+ int tile_col_size_bytes;
+#endif // CONFIG_EXT_TILE
unsigned int max_tile_size;
unsigned int max_tile_col_size;
@@ -4991,76 +4910,77 @@ void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) {
bitstream_queue_reset_write();
#endif
-#if !CONFIG_TILE_GROUPS
- int tile_size_bytes;
- int tile_col_size_bytes;
- AV1_COMMON *const cm = &cpi->common;
- const int have_tiles = cm->tile_cols * cm->tile_rows > 1;
-
- // Write the uncompressed header
- write_uncompressed_header(cpi, &wb);
+#if CONFIG_EXT_TILE
+ if (cm->large_scale_tile) {
+ // Write the uncompressed header
+ write_uncompressed_header(cpi, &wb);
#if CONFIG_EXT_REFS
- if (cm->show_existing_frame) {
- *size = aom_wb_bytes_written(&wb);
- return;
- }
+ if (cm->show_existing_frame) {
+ *size = aom_wb_bytes_written(&wb);
+ return;
+ }
#endif // CONFIG_EXT_REFS
- // We do not know these in advance. Output placeholder bit.
- saved_wb = wb;
- // Write tile size magnitudes
- if (have_tiles) {
-// Note that the last item in the uncompressed header is the data
-// describing tile configuration.
-#if CONFIG_EXT_TILE
- // Number of bytes in tile column size - 1
- aom_wb_write_literal(&wb, 0, 2);
-#endif // CONFIG_EXT_TILE
- // Number of bytes in tile size - 1
- aom_wb_write_literal(&wb, 0, 2);
- }
- // Size of compressed header
- aom_wb_write_literal(&wb, 0, 16);
+ // We do not know these in advance. Output placeholder bit.
+ saved_wb = wb;
+ // Write tile size magnitudes
+ if (have_tiles) {
+ // Note that the last item in the uncompressed header is the data
+ // describing tile configuration.
+ // Number of bytes in tile column size - 1
+ aom_wb_write_literal(&wb, 0, 2);
+
+ // Number of bytes in tile size - 1
+ aom_wb_write_literal(&wb, 0, 2);
+ }
+ // Size of compressed header
+ aom_wb_write_literal(&wb, 0, 16);
- uncompressed_header_size = (uint32_t)aom_wb_bytes_written(&wb);
- data += uncompressed_header_size;
+ uncompressed_header_size = (uint32_t)aom_wb_bytes_written(&wb);
+ data += uncompressed_header_size;
- aom_clear_system_state();
+ aom_clear_system_state();
- // Write the compressed header
- compressed_header_size = write_compressed_header(cpi, data);
- data += compressed_header_size;
+ // Write the compressed header
+ compressed_header_size = write_compressed_header(cpi, data);
+ data += compressed_header_size;
- // Write the encoded tile data
- data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size);
-#else
- data_size = write_tiles(cpi, &wb, &max_tile_size, &max_tile_col_size);
-#endif
-#if !CONFIG_TILE_GROUPS
- if (have_tiles) {
- data_size =
- remux_tiles(cm, data, data_size, max_tile_size, max_tile_col_size,
- &tile_size_bytes, &tile_col_size_bytes);
+ // Write the encoded tile data
+ data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size);
+ } else {
+#endif // CONFIG_EXT_TILE
+ data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size);
+#if CONFIG_EXT_TILE
}
+#endif // CONFIG_EXT_TILE
+#if CONFIG_EXT_TILE
+ if (cm->large_scale_tile) {
+ if (have_tiles) {
+ data_size =
+ remux_tiles(cm, data, data_size, max_tile_size, max_tile_col_size,
+ &tile_size_bytes, &tile_col_size_bytes);
+ }
- data += data_size;
+ data += data_size;
- // Now fill in the gaps in the uncompressed header.
- if (have_tiles) {
-#if CONFIG_EXT_TILE
- assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4);
- aom_wb_write_literal(&saved_wb, tile_col_size_bytes - 1, 2);
+ // Now fill in the gaps in the uncompressed header.
+ if (have_tiles) {
+ assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4);
+ aom_wb_write_literal(&saved_wb, tile_col_size_bytes - 1, 2);
+
+ assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
+ aom_wb_write_literal(&saved_wb, tile_size_bytes - 1, 2);
+ }
+ // TODO(jbb): Figure out what to do if compressed_header_size > 16 bits.
+ assert(compressed_header_size <= 0xffff);
+ aom_wb_write_literal(&saved_wb, compressed_header_size, 16);
+ } else {
#endif // CONFIG_EXT_TILE
- assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
- aom_wb_write_literal(&saved_wb, tile_size_bytes - 1, 2);
+ data += data_size;
+#if CONFIG_EXT_TILE
}
- // TODO(jbb): Figure out what to do if compressed_header_size > 16 bits.
- assert(compressed_header_size <= 0xffff);
- aom_wb_write_literal(&saved_wb, compressed_header_size, 16);
-#else
- data += data_size;
-#endif
+#endif // CONFIG_EXT_TILE
#if CONFIG_ANS && ANS_REVERSE
// Avoid aliasing the superframe index
*data++ = 0;
diff --git a/third_party/aom/av1/encoder/bitstream.h b/third_party/aom/av1/encoder/bitstream.h
index c75d80891..29c930356 100644
--- a/third_party/aom/av1/encoder/bitstream.h
+++ b/third_party/aom/av1/encoder/bitstream.h
@@ -19,7 +19,11 @@ extern "C" {
#include "av1/encoder/encoder.h"
#if CONFIG_REFERENCE_BUFFER
-void write_sequence_header(SequenceHeader *seq_params);
+void write_sequence_header(
+#if CONFIG_EXT_TILE
+ AV1_COMMON *const cm,
+#endif // CONFIG_EXT_TILE
+ SequenceHeader *seq_params);
#endif
void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dest, size_t *size);
@@ -42,7 +46,8 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
const int supertx_enabled,
#endif
#if CONFIG_TXK_SEL
- int block, int plane,
+ int blk_row, int blk_col, int block, int plane,
+ TX_SIZE tx_size,
#endif
aom_writer *w);
diff --git a/third_party/aom/av1/encoder/block.h b/third_party/aom/av1/encoder/block.h
index e16479e64..7b6eb0b0e 100644
--- a/third_party/aom/av1/encoder/block.h
+++ b/third_party/aom/av1/encoder/block.h
@@ -116,7 +116,6 @@ struct macroblock {
// The equivalend SAD error of one (whole) bit at the current quantizer
// for sub-8x8 blocks.
int sadperbit4;
- int rddiv;
int rdmult;
int mb_energy;
int *m_search_count_ptr;
@@ -206,16 +205,15 @@ struct macroblock {
int pvq_speed;
int pvq_coded; // Indicates whether pvq_info needs be stored to tokenize
#endif
-#if CONFIG_DAALA_DIST
- // Keep rate of each 4x4 block in the current macroblock during RDO
- // This is needed when using the 8x8 Daala distortion metric during RDO,
- // because it evaluates distortion in a different order than the underlying
- // 4x4 blocks are coded.
- int rate_4x4[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+#if CONFIG_DIST_8X8
#if CONFIG_CB4X4
+#if CONFIG_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, decoded_8x8[8 * 8]);
+#else
DECLARE_ALIGNED(16, uint8_t, decoded_8x8[8 * 8]);
+#endif
#endif // CONFIG_CB4X4
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
#if CONFIG_CFL
// Whether luma needs to be stored during RDO.
int cfl_store_y;
diff --git a/third_party/aom/av1/encoder/context_tree.c b/third_party/aom/av1/encoder/context_tree.c
index 4c7d6ff00..b1c01b28e 100644
--- a/third_party/aom/av1/encoder/context_tree.c
+++ b/third_party/aom/av1/encoder/context_tree.c
@@ -65,12 +65,10 @@ static void alloc_mode_context(AV1_COMMON *cm, int num_4x4_blk,
}
#if CONFIG_PALETTE
- if (cm->allow_screen_content_tools) {
- for (i = 0; i < 2; ++i) {
- CHECK_MEM_ERROR(
- cm, ctx->color_index_map[i],
- aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
- }
+ for (i = 0; i < 2; ++i) {
+ CHECK_MEM_ERROR(
+ cm, ctx->color_index_map[i],
+ aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
}
#endif // CONFIG_PALETTE
}
@@ -141,7 +139,13 @@ static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree,
&tree->verticalb[1]);
alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B,
&tree->verticalb[2]);
-#ifdef CONFIG_SUPERTX
+ for (int i = 0; i < 4; ++i) {
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_4,
+ &tree->horizontal4[i]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_4,
+ &tree->vertical4[i]);
+ }
+#if CONFIG_SUPERTX
alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ,
&tree->horizontal_supertx);
alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT, &tree->vertical_supertx);
@@ -159,7 +163,7 @@ static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree,
alloc_mode_context(cm, num_4x4_blk, &tree->none);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[0]);
alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[0]);
-#ifdef CONFIG_SUPERTX
+#if CONFIG_SUPERTX
alloc_mode_context(cm, num_4x4_blk, &tree->horizontal_supertx);
alloc_mode_context(cm, num_4x4_blk, &tree->vertical_supertx);
alloc_mode_context(cm, num_4x4_blk, &tree->split_supertx);
@@ -184,13 +188,17 @@ static void free_tree_contexts(PC_TREE *tree) {
free_mode_context(&tree->verticala[i]);
free_mode_context(&tree->verticalb[i]);
}
+ for (i = 0; i < 4; ++i) {
+ free_mode_context(&tree->horizontal4[i]);
+ free_mode_context(&tree->vertical4[i]);
+ }
#endif // CONFIG_EXT_PARTITION_TYPES
free_mode_context(&tree->none);
free_mode_context(&tree->horizontal[0]);
free_mode_context(&tree->horizontal[1]);
free_mode_context(&tree->vertical[0]);
free_mode_context(&tree->vertical[1]);
-#ifdef CONFIG_SUPERTX
+#if CONFIG_SUPERTX
free_mode_context(&tree->horizontal_supertx);
free_mode_context(&tree->vertical_supertx);
free_mode_context(&tree->split_supertx);
diff --git a/third_party/aom/av1/encoder/context_tree.h b/third_party/aom/av1/encoder/context_tree.h
index 4f9d5e374..bcfcc274a 100644
--- a/third_party/aom/av1/encoder/context_tree.h
+++ b/third_party/aom/av1/encoder/context_tree.h
@@ -81,12 +81,14 @@ typedef struct PC_TREE {
PICK_MODE_CONTEXT horizontalb[3];
PICK_MODE_CONTEXT verticala[3];
PICK_MODE_CONTEXT verticalb[3];
+ PICK_MODE_CONTEXT horizontal4[4];
+ PICK_MODE_CONTEXT vertical4[4];
#endif
union {
struct PC_TREE *split[4];
PICK_MODE_CONTEXT *leaf_split[4];
};
-#ifdef CONFIG_SUPERTX
+#if CONFIG_SUPERTX
PICK_MODE_CONTEXT horizontal_supertx;
PICK_MODE_CONTEXT vertical_supertx;
PICK_MODE_CONTEXT split_supertx;
diff --git a/third_party/aom/av1/encoder/cost.c b/third_party/aom/av1/encoder/cost.c
index e3151a597..e33df53e4 100644
--- a/third_party/aom/av1/encoder/cost.c
+++ b/third_party/aom/av1/encoder/cost.c
@@ -65,3 +65,21 @@ void av1_cost_tokens_skip(int *costs, const aom_prob *probs, aom_tree tree) {
costs[-tree[0]] = av1_cost_bit(probs[0], 0);
cost(costs, tree, probs, 2, 0);
}
+
+void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf,
+ const int *inv_map) {
+ int i;
+ aom_cdf_prob prev_cdf = 0;
+ for (i = 0;; ++i) {
+ const aom_cdf_prob p15 = AOM_ICDF(cdf[i]) - prev_cdf;
+ prev_cdf = AOM_ICDF(cdf[i]);
+
+ if (inv_map)
+ costs[inv_map[i]] = av1_cost_symbol(p15);
+ else
+ costs[i] = av1_cost_symbol(p15);
+
+ // Stop once we reach the end of the CDF
+ if (cdf[i] == AOM_ICDF(CDF_PROB_TOP)) break;
+ }
+}
diff --git a/third_party/aom/av1/encoder/cost.h b/third_party/aom/av1/encoder/cost.h
index d8fb357e6..e60632005 100644
--- a/third_party/aom/av1/encoder/cost.h
+++ b/third_party/aom/av1/encoder/cost.h
@@ -34,6 +34,14 @@ extern const uint16_t av1_prob_cost[256];
// for each bit.
#define av1_cost_literal(n) ((n) * (1 << AV1_PROB_COST_SHIFT))
+// Calculate the cost of a symbol with probability p15 / 2^15
+static INLINE int av1_cost_symbol(aom_cdf_prob p15) {
+ assert(0 < p15 && p15 < CDF_PROB_TOP);
+ const int shift = CDF_PROB_BITS - 1 - get_msb(p15);
+ return av1_cost_zero(get_prob(p15 << shift, CDF_PROB_TOP)) +
+ av1_cost_literal(shift);
+}
+
static INLINE unsigned int cost_branch256(const unsigned int ct[2],
aom_prob p) {
return ct[0] * av1_cost_zero(p) + ct[1] * av1_cost_one(p);
@@ -55,6 +63,8 @@ static INLINE int treed_cost(aom_tree tree, const aom_prob *probs, int bits,
void av1_cost_tokens(int *costs, const aom_prob *probs, aom_tree tree);
void av1_cost_tokens_skip(int *costs, const aom_prob *probs, aom_tree tree);
+void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf,
+ const int *inv_map);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/encoder/dct.c b/third_party/aom/av1/encoder/dct.c
index f6b64f0f7..850b84ca9 100644
--- a/third_party/aom/av1/encoder/dct.c
+++ b/third_party/aom/av1/encoder/dct.c
@@ -21,6 +21,9 @@
#include "av1/common/av1_fwd_txfm1d.h"
#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
+#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8
+#include "av1/common/daala_tx.h"
+#endif
static INLINE void range_check(const tran_low_t *input, const int size,
const int bit) {
@@ -39,6 +42,18 @@ static INLINE void range_check(const tran_low_t *input, const int size,
#endif
}
+#if CONFIG_DAALA_DCT4
+static void fdct4(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ od_coeff x[4];
+ od_coeff y[4];
+ for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
+ od_bin_fdct4(y, x, 1);
+ for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
+}
+
+#else
+
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[4];
@@ -74,6 +89,19 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) {
range_check(output, 4, 16);
}
+#endif
+
+#if CONFIG_DAALA_DCT8
+static void fdct8(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ od_coeff x[8];
+ od_coeff y[8];
+ for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
+ od_bin_fdct8(y, x, 1);
+ for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
+}
+
+#else
static void fdct8(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
@@ -152,6 +180,7 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
range_check(output, 8, 16);
}
+#endif
static void fdct16(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
@@ -767,6 +796,18 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) {
output[3] = (tran_low_t)fdct_round_shift(s3);
}
+#if CONFIG_DAALA_DCT8
+static void fadst8(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ od_coeff x[8];
+ od_coeff y[8];
+ for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
+ od_bin_fdst8(y, x, 1);
+ for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
+}
+
+#else
+
static void fadst8(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
@@ -837,6 +878,7 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
output[6] = (tran_low_t)x5;
output[7] = (tran_low_t)-x1;
}
+#endif
static void fadst16(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
@@ -1021,6 +1063,83 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
// Note overall scaling factor is 4 times orthogonal
}
+#if CONFIG_MRC_TX
+static void get_masked_residual32(const int16_t **input, int *input_stride,
+ const uint8_t *pred, int pred_stride,
+ int16_t *masked_input) {
+ int mrc_mask[32 * 32];
+ get_mrc_mask(pred, pred_stride, mrc_mask, 32, 32, 32);
+ int32_t sum = 0;
+ int16_t avg;
+ // Get the masked average of the prediction
+ for (int i = 0; i < 32; ++i) {
+ for (int j = 0; j < 32; ++j) {
+ sum += mrc_mask[i * 32 + j] * (*input)[i * (*input_stride) + j];
+ }
+ }
+ avg = ROUND_POWER_OF_TWO_SIGNED(sum, 10);
+ // Replace all of the unmasked pixels in the prediction with the average
+ // of the masked pixels
+ for (int i = 0; i < 32; ++i) {
+ for (int j = 0; j < 32; ++j)
+ masked_input[i * 32 + j] =
+ (mrc_mask[i * 32 + j]) ? (*input)[i * (*input_stride) + j] : avg;
+ }
+ *input = masked_input;
+ *input_stride = 32;
+}
+#endif // CONFIG_MRC_TX
+
+#if CONFIG_LGT
+static void flgt4(const tran_low_t *input, tran_low_t *output,
+ const tran_high_t *lgtmtx) {
+ if (!(input[0] | input[1] | input[2] | input[3])) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
+ }
+
+ // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,4
+ tran_high_t s[4] = { 0 };
+ for (int i = 0; i < 4; ++i)
+ for (int j = 0; j < 4; ++j) s[j] += lgtmtx[j * 4 + i] * input[i];
+
+ for (int i = 0; i < 4; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]);
+}
+
+static void flgt8(const tran_low_t *input, tran_low_t *output,
+ const tran_high_t *lgtmtx) {
+ // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,8
+ tran_high_t s[8] = { 0 };
+ for (int i = 0; i < 8; ++i)
+ for (int j = 0; j < 8; ++j) s[j] += lgtmtx[j * 8 + i] * input[i];
+
+ for (int i = 0; i < 8; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]);
+}
+
+// The get_fwd_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
+int get_fwd_lgt4(transform_1d tx_orig, TxfmParam *txfm_param,
+ const tran_high_t *lgtmtx[], int ntx) {
+ // inter/intra split
+ if (tx_orig == &fadst4) {
+ for (int i = 0; i < ntx; ++i)
+ lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
+ return 1;
+ }
+ return 0;
+}
+
+int get_fwd_lgt8(transform_1d tx_orig, TxfmParam *txfm_param,
+ const tran_high_t *lgtmtx[], int ntx) {
+ // inter/intra split
+ if (tx_orig == &fadst8) {
+ for (int i = 0; i < ntx; ++i)
+ lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
+ return 1;
+ }
+ return 0;
+}
+#endif // CONFIG_LGT
+
#if CONFIG_EXT_TX
// TODO(sarahparker) these functions will be removed once the highbitdepth
// codepath works properly for rectangular transforms. They have almost
@@ -1028,13 +1147,24 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
// being used for square transforms.
static void fidtx4(const tran_low_t *input, tran_low_t *output) {
int i;
- for (i = 0; i < 4; ++i)
+ for (i = 0; i < 4; ++i) {
+#if CONFIG_DAALA_DCT4
+ output[i] = input[i];
+#else
output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2);
+#endif
+ }
}
static void fidtx8(const tran_low_t *input, tran_low_t *output) {
int i;
- for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
+ for (i = 0; i < 8; ++i) {
+#if CONFIG_DAALA_DCT8
+ output[i] = input[i];
+#else
+ output[i] = input[i] * 2;
+#endif
+ }
}
static void fidtx16(const tran_low_t *input, tran_low_t *output) {
@@ -1110,6 +1240,9 @@ static void copy_fliplrud(const int16_t *src, int src_stride, int l, int w,
static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w,
int16_t *buff, int tx_type) {
switch (tx_type) {
+#if CONFIG_MRC_TX
+ case MRC_DCT:
+#endif // CONFIG_MRC_TX
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
@@ -1144,10 +1277,21 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w,
#endif // CONFIG_EXT_TX
void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
+#if !CONFIG_DAALA_DCT4
if (tx_type == DCT_DCT) {
aom_fdct4x4_c(input, output, stride);
- } else {
+ return;
+ }
+#endif
+ {
static const transform_2d FHT[] = {
{ fdct4, fdct4 }, // DCT_DCT
{ fadst4, fdct4 }, // ADST_DCT
@@ -1166,7 +1310,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx4, fadst4 }, // H_ADST
{ fadst4, fidtx4 }, // V_FLIPADST
{ fidtx4, fadst4 }, // H_FLIPADST
-#endif // CONFIG_EXT_TX
+#endif
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[4 * 4];
@@ -1178,25 +1322,60 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, 4, 4, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ // Choose LGT adaptive to the prediction. We may apply different LGTs for
+ // different rows/columns, indicated by the pointers to 2D arrays
+ const tran_high_t *lgtmtx_col[4];
+ const tran_high_t *lgtmtx_row[4];
+ int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 4);
+ int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 4);
+#endif
+
// Columns
for (i = 0; i < 4; ++i) {
+ /* A C99-safe upshift by 4 for both Daala and VPx TX. */
for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16;
+#if !CONFIG_DAALA_DCT4
if (i == 0 && temp_in[0]) temp_in[0] += 1;
- ht.cols(temp_in, temp_out);
+#endif
+#if CONFIG_LGT
+ if (use_lgt_col)
+ flgt4(temp_in, temp_out, lgtmtx_col[i]);
+ else
+#endif
+ ht.cols(temp_in, temp_out);
for (j = 0; j < 4; ++j) out[j * 4 + i] = temp_out[j];
}
// Rows
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4];
- ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_row)
+ flgt4(temp_in, temp_out, lgtmtx_row[i]);
+ else
+#endif
+ ht.rows(temp_in, temp_out);
+#if CONFIG_DAALA_DCT4
+ /* Daala TX has orthonormal scaling; shift down by only 1 to achieve
+ the usual VPx coefficient left-shift of 3. */
+ for (j = 0; j < 4; ++j) output[j + i * 4] = temp_out[j] >> 1;
+#else
for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2;
+#endif
}
}
}
void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct8, fdct4 }, // DCT_DCT
{ fadst8, fdct4 }, // ADST_DCT
@@ -1228,19 +1407,36 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_col[4];
+ const tran_high_t *lgtmtx_row[8];
+ int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 4);
+ int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 8);
+#endif
+
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] =
(tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
- ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_row)
+ flgt4(temp_in, temp_out, lgtmtx_row[i]);
+ else
+#endif
+ ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
}
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
- ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_col)
+ flgt8(temp_in, temp_out, lgtmtx_col[i]);
+ else
+#endif
+ ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[i + j * n] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
}
@@ -1248,7 +1444,14 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct4, fdct8 }, // DCT_DCT
{ fadst4, fdct8 }, // ADST_DCT
@@ -1280,19 +1483,36 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_col[8];
+ const tran_high_t *lgtmtx_row[4];
+ int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 8);
+ int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 4);
+#endif
+
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
- ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_col)
+ flgt4(temp_in, temp_out, lgtmtx_col[i]);
+ else
+#endif
+ ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
}
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
- ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_row)
+ flgt8(temp_in, temp_out, lgtmtx_row[i]);
+ else
+#endif
+ ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[j + i * n2] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
}
@@ -1300,7 +1520,14 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct16, fdct4 }, // DCT_DCT
{ fadst16, fdct4 }, // ADST_DCT
@@ -1332,10 +1559,20 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_row[16];
+ int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 16);
+#endif
+
// Rows
for (i = 0; i < n4; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
- ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_row)
+ flgt4(temp_in, temp_out, lgtmtx_row[i]);
+ else
+#endif
+ ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
}
@@ -1350,7 +1587,14 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct4, fdct16 }, // DCT_DCT
{ fadst4, fdct16 }, // ADST_DCT
@@ -1382,10 +1626,20 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_col[16];
+ int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 16);
+#endif
+
// Columns
for (i = 0; i < n4; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_col)
+ flgt4(temp_in, temp_out, lgtmtx_col[i]);
+ else
+#endif
+ ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
}
@@ -1400,7 +1654,14 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct16, fdct8 }, // DCT_DCT
{ fadst16, fdct8 }, // ADST_DCT
@@ -1432,12 +1693,22 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_row[16];
+ int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 16);
+#endif
+
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] =
(tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
- ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_row)
+ flgt8(temp_in, temp_out, lgtmtx_row[i]);
+ else
+#endif
+ ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
@@ -1452,7 +1723,14 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct8, fdct16 }, // DCT_DCT
{ fadst8, fdct16 }, // ADST_DCT
@@ -1484,12 +1762,22 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_col[16];
+ int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 16);
+#endif
+
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
- ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_col)
+ flgt8(temp_in, temp_out, lgtmtx_col[i]);
+ else
+#endif
+ ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
@@ -1504,7 +1792,14 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct32, fdct8 }, // DCT_DCT
{ fhalfright32, fdct8 }, // ADST_DCT
@@ -1536,10 +1831,20 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_row[32];
+ int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 32);
+#endif
+
// Rows
for (i = 0; i < n4; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
- ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_row)
+ flgt8(temp_in, temp_out, lgtmtx_row[i]);
+ else
+#endif
+ ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
}
@@ -1554,7 +1859,14 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct8, fdct32 }, // DCT_DCT
{ fadst8, fdct32 }, // ADST_DCT
@@ -1586,10 +1898,20 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_col[32];
+ int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 32);
+#endif
+
// Columns
for (i = 0; i < n4; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_col)
+ flgt8(temp_in, temp_out, lgtmtx_col[i]);
+ else
+#endif
+ ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
}
@@ -1604,7 +1926,14 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct32, fdct16 }, // DCT_DCT
{ fhalfright32, fdct16 }, // ADST_DCT
@@ -1656,7 +1985,14 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
}
void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct16, fdct32 }, // DCT_DCT
{ fadst16, fdct32 }, // ADST_DCT
@@ -1833,10 +2169,21 @@ void av1_fdct8x8_quant_c(const int16_t *input, int stride,
}
void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
+#if !CONFIG_DAALA_DCT8
if (tx_type == DCT_DCT) {
aom_fdct8x8_c(input, output, stride);
- } else {
+ return;
+ }
+#endif
+ {
static const transform_2d FHT[] = {
{ fdct8, fdct8 }, // DCT_DCT
{ fadst8, fdct8 }, // ADST_DCT
@@ -1855,7 +2202,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx8, fadst8 }, // H_ADST
{ fadst8, fidtx8 }, // V_FLIPADST
{ fidtx8, fadst8 }, // H_FLIPADST
-#endif // CONFIG_EXT_TX
+#endif
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[64];
@@ -1867,19 +2214,45 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, 8, 8, flipped_input, tx_type);
#endif
+#if CONFIG_LGT
+ const tran_high_t *lgtmtx_col[8];
+ const tran_high_t *lgtmtx_row[8];
+ int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 8);
+ int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 8);
+#endif
+
// Columns
for (i = 0; i < 8; ++i) {
+#if CONFIG_DAALA_DCT8
+ for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 16;
+#else
for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
+#endif
+#if CONFIG_LGT
+ if (use_lgt_col)
+ flgt8(temp_in, temp_out, lgtmtx_col[i]);
+ else
+#endif
+ ht.cols(temp_in, temp_out);
for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j];
}
// Rows
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
- ht.rows(temp_in, temp_out);
+#if CONFIG_LGT
+ if (use_lgt_row)
+ flgt8(temp_in, temp_out, lgtmtx_row[i]);
+ else
+#endif
+ ht.rows(temp_in, temp_out);
+#if CONFIG_DAALA_DCT8
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+#else
for (j = 0; j < 8; ++j)
output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+#endif
}
}
}
@@ -1941,7 +2314,14 @@ void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
}
void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct16, fdct16 }, // DCT_DCT
{ fadst16, fdct16 }, // ADST_DCT
@@ -1960,9 +2340,8 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx16, fadst16 }, // H_ADST
{ fadst16, fidtx16 }, // V_FLIPADST
{ fidtx16, fadst16 }, // H_FLIPADST
-#endif // CONFIG_EXT_TX
+#endif
};
-
const transform_2d ht = FHT[tx_type];
tran_low_t out[256];
int i, j;
@@ -1989,80 +2368,17 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
}
}
-#if CONFIG_HIGHBITDEPTH
-void av1_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht4x4_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht4x8_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht8x4_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht8x16_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht16x8_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht16x32_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht32x16_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht4x16_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht16x4_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht8x32_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht32x8_c(input, output, stride, tx_type);
-}
-
-void av1_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht8x8_c(input, output, stride, tx_type);
-}
-
void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
int stride) {
av1_fwht4x4_c(input, output, stride);
}
-void av1_highbd_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht16x16_c(input, output, stride, tx_type);
-}
-#endif // CONFIG_HIGHBITDEPTH
-
void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct32, fdct32 }, // DCT_DCT
#if CONFIG_EXT_TX
@@ -2082,6 +2398,9 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
{ fhalfright32, fidtx32 }, // V_FLIPADST
{ fidtx32, fhalfright32 }, // H_FLIPADST
#endif
+#if CONFIG_MRC_TX
+ { fdct32, fdct32 }, // MRC_TX
+#endif // CONFIG_MRC_TX
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[1024];
@@ -2093,6 +2412,14 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, 32, 32, flipped_input, tx_type);
#endif
+#if CONFIG_MRC_TX
+ if (tx_type == MRC_DCT) {
+ int16_t masked_input[32 * 32];
+ get_masked_residual32(&input, &stride, txfm_param->dst, txfm_param->stride,
+ masked_input);
+ }
+#endif // CONFIG_MRC_TX
+
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
@@ -2150,7 +2477,14 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
}
void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+ assert(tx_type == DCT_DCT);
+#endif
static const transform_2d FHT[] = {
{ fdct64_col, fdct64_row }, // DCT_DCT
#if CONFIG_EXT_TX
@@ -2179,6 +2513,7 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
int16_t flipped_input[64 * 64];
maybe_flip_input(&input, &stride, 64, 64, flipped_input, tx_type);
#endif
+
// Columns
for (i = 0; i < 64; ++i) {
for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i];
@@ -2214,20 +2549,6 @@ void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
}
#endif // CONFIG_EXT_TX
-#if CONFIG_HIGHBITDEPTH
-void av1_highbd_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht32x32_c(input, output, stride, tx_type);
-}
-
-#if CONFIG_TX64X64
-void av1_highbd_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
- av1_fht64x64_c(input, output, stride, tx_type);
-}
-#endif // CONFIG_TX64X64
-#endif // CONFIG_HIGHBITDEPTH
-
#if CONFIG_DPCM_INTRA
void av1_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
tran_low_t *output) {
@@ -2271,5 +2592,54 @@ void av1_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride];
ft(temp_in, output);
}
+
+#if CONFIG_HIGHBITDEPTH
+void av1_hbd_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+ tran_low_t *output, int dir) {
+ (void)dir;
+ assert(tx_type < TX_TYPES_1D);
+ static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 };
+ const transform_1d ft = FHT[tx_type];
+ tran_low_t temp_in[4];
+ for (int i = 0; i < 4; ++i)
+ temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2);
+ ft(temp_in, output);
+}
+
+void av1_hbd_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+ tran_low_t *output, int dir) {
+ (void)dir;
+ assert(tx_type < TX_TYPES_1D);
+ static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 };
+ const transform_1d ft = FHT[tx_type];
+ tran_low_t temp_in[8];
+ for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4;
+ ft(temp_in, output);
+}
+
+void av1_hbd_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+ tran_low_t *output, int dir) {
+ (void)dir;
+ assert(tx_type < TX_TYPES_1D);
+ static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 };
+ const transform_1d ft = FHT[tx_type];
+ tran_low_t temp_in[16];
+ for (int i = 0; i < 16; ++i)
+ temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2);
+ ft(temp_in, output);
+}
+
+void av1_hbd_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+ tran_low_t *output, int dir) {
+ (void)dir;
+ assert(tx_type < TX_TYPES_1D);
+ static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32,
+ fidtx32 };
+ const transform_1d ft = FHT[tx_type];
+ tran_low_t temp_in[32];
+ for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride];
+ ft(temp_in, output);
+}
+#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_DPCM_INTRA
#endif // !AV1_DCT_GTEST
diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c
index 36d09c02a..d13eb42fb 100644
--- a/third_party/aom/av1/encoder/encodeframe.c
+++ b/third_party/aom/av1/encoder/encodeframe.c
@@ -84,7 +84,7 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_EXT_INTER
int mi_row_ori, int mi_col_ori,
#endif // CONFIG_EXT_INTER
- int mi_row_pred, int mi_col_pred,
+ int mi_row_pred, int mi_col_pred, int plane,
BLOCK_SIZE bsize_pred, int b_sub8x8, int block);
static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
PC_TREE *pc_tree);
@@ -308,7 +308,6 @@ static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
av1_setup_src_planes(x, cpi->source, mi_row, mi_col);
// R/D setup.
- x->rddiv = cpi->rd.RDDIV;
x->rdmult = cpi->rd.RDMULT;
// required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
@@ -326,6 +325,10 @@ static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_CFL
+ xd->cfl->mi_row = mi_row;
+ xd->cfl->mi_col = mi_col;
+#endif
// Setup segment ID.
if (seg->enabled) {
@@ -413,7 +416,6 @@ static void set_offsets_extend(const AV1_COMP *const cpi, ThreadData *td,
xd->left_available = (mi_col_ori > tile->mi_col_start);
// R/D setup.
- x->rddiv = cpi->rd.RDDIV;
x->rdmult = cpi->rd.RDMULT;
}
@@ -539,6 +541,21 @@ static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv,
mbmi->pred_mv[1] = this_mv;
mi_pred_mv[1] = this_mv;
}
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(mbmi->mode)) {
+ // Special case: SR_NEAR_NEWMV uses 1 + mbmi->ref_mv_idx
+ // (like NEARMV) instead
+ if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx += 1;
+
+ if (compound_ref0_mode(mbmi->mode) == NEWMV ||
+ compound_ref1_mode(mbmi->mode) == NEWMV) {
+ int_mv this_mv = curr_ref_mv_stack[ref_mv_idx].this_mv;
+ clamp_mv_ref(&this_mv.as_mv, bw, bh, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
+ mbmi->pred_mv[0] = this_mv;
+ mi_pred_mv[0] = this_mv;
+ }
+#endif // CONFIG_COMPOUND_SINGLEREF
} else {
#endif // CONFIG_EXT_INTER
if (mbmi->mode == NEWMV) {
@@ -635,7 +652,6 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_PALETTE
for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
#endif // CONFIG_PALETTE
-
// Restore the coding context of the MB to that that was in place
// when the mode was picked for it
for (y = 0; y < mi_height; y++)
@@ -814,7 +830,6 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td,
}
mi_addr->mbmi.segment_id_supertx = MAX_SEGMENTS;
}
-
// Restore the coding context of the MB to that that was in place
// when the mode was picked for it
for (y = 0; y < mi_height; y++)
@@ -1147,7 +1162,7 @@ static void update_supertx_param_sb(const AV1_COMP *const cpi, ThreadData *td,
}
#endif // CONFIG_SUPERTX
-#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
+#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
static void set_mode_info_b(const AV1_COMP *const cpi,
const TileInfo *const tile, ThreadData *td,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -1167,6 +1182,7 @@ static void set_mode_info_sb(const AV1_COMP *const cpi, ThreadData *td,
BLOCK_SIZE subsize = get_subsize(bsize, partition);
#if CONFIG_EXT_PARTITION_TYPES
const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+ const int quarter_step = mi_size_wide[bsize] / 4;
#endif
#if CONFIG_CB4X4
const int unify_bsize = 1;
@@ -1245,6 +1261,24 @@ static void set_mode_info_sb(const AV1_COMP *const cpi, ThreadData *td,
set_mode_info_b(cpi, tile, td, mi_row + hbs, mi_col + hbs, bsize2,
&pc_tree->verticalb[2]);
break;
+ case PARTITION_HORZ_4:
+ for (int i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+ if (i > 0 && this_mi_row >= cm->mi_rows) break;
+
+ set_mode_info_b(cpi, tile, td, this_mi_row, mi_col, subsize,
+ &pc_tree->horizontal4[i]);
+ }
+ break;
+ case PARTITION_VERT_4:
+ for (int i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+
+ set_mode_info_b(cpi, tile, td, mi_row, this_mi_col, subsize,
+ &pc_tree->vertical4[i]);
+ }
+ break;
#endif // CONFIG_EXT_PARTITION_TYPES
default: assert(0 && "Invalid partition type."); break;
}
@@ -1281,10 +1315,10 @@ static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
-static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
- BLOCK_SIZE bsize, int bw, int bh,
- int mi_row, int mi_col) {
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+static void dist_8x8_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
+ BLOCK_SIZE bsize, int bw, int bh,
+ int mi_row, int mi_col) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
const int dst_stride = pd->dst.stride;
@@ -1294,12 +1328,24 @@ static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
if (bsize < BLOCK_8X8) {
int i, j;
- uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2];
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *dst8x8_16 = (uint16_t *)dst8x8;
+ uint16_t *dst_sub8x8 = &dst8x8_16[((mi_row & 1) * 8 + (mi_col & 1)) << 2];
- for (j = 0; j < bh; ++j)
- for (i = 0; i < bw; ++i) {
- dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i];
- }
+ for (j = 0; j < bh; ++j)
+ for (i = 0; i < bw; ++i)
+ dst_sub8x8[j * 8 + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+ } else {
+#endif
+ uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2];
+
+ for (j = 0; j < bh; ++j)
+ for (i = 0; i < bw; ++i)
+ dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i];
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif
}
}
#endif
@@ -1330,10 +1376,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
x->pvq_speed = 1;
x->pvq_coded = 0;
#endif
-#if CONFIG_CFL
- // Don't store luma during RDO (we will store the best mode later).
- x->cfl_store_y = 0;
-#endif
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mbmi = &xd->mi[0]->mbmi;
@@ -1342,6 +1384,10 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
mbmi->mi_row = mi_row;
mbmi->mi_col = mi_col;
#endif
+#if CONFIG_CFL
+ // Don't store luma during RDO. Only store luma when best luma is known
+ x->cfl_store_y = 0;
+#endif
#if CONFIG_SUPERTX
// We set tx_size here as skip blocks would otherwise not set it.
// tx_size needs to be set at this point as supertx_enable in
@@ -1542,6 +1588,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
#endif
if (!frame_is_intra_only(cm)) {
FRAME_COUNTS *const counts = td->counts;
+ RD_COUNTS *rdc = &td->rd_counts;
const int inter_block = is_inter_block(mbmi);
const int seg_ref_active =
segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
@@ -1560,6 +1607,12 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
#endif // CONFIG_EXT_REFS
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+ if (has_second_ref(mbmi))
+ // This flag is also updated for 4x4 blocks
+ rdc->compound_ref_used_flag = 1;
+ else
+ // This flag is also updated for 4x4 blocks
+ rdc->single_ref_used_flag = 1;
#if !SUB8X8_COMP_REF
if (mbmi->sb_type != BLOCK_4X4)
counts->comp_inter[av1_get_reference_mode_context(cm, xd)]
@@ -1571,24 +1624,53 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
}
if (has_second_ref(mbmi)) {
+#if CONFIG_EXT_COMP_REFS
+ const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
+ ? UNIDIR_COMP_REFERENCE
+ : BIDIR_COMP_REFERENCE;
+#if !USE_UNI_COMP_REFS
+ // TODO(zoeliu): Temporarily turn off uni-directional comp refs
+ assert(comp_ref_type == BIDIR_COMP_REFERENCE);
+#endif // !USE_UNI_COMP_REFS
+ counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
+ [comp_ref_type]++;
+
+ if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
+ const int bit = (ref0 == BWDREF_FRAME);
+ counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0]
+ [bit]++;
+ if (!bit) {
+ const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
+ counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
+ [bit1]++;
+ if (bit1) {
+ counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)]
+ [2][ref1 == GOLDEN_FRAME]++;
+ }
+ }
+ } else {
+#endif // CONFIG_EXT_COMP_REFS
#if CONFIG_EXT_REFS
- const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
+ const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
- counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0][bit]++;
- if (!bit) {
- counts->comp_ref[av1_get_pred_context_comp_ref_p1(cm, xd)][1]
- [ref0 == LAST_FRAME]++;
- } else {
- counts->comp_ref[av1_get_pred_context_comp_ref_p2(cm, xd)][2]
- [ref0 == GOLDEN_FRAME]++;
- }
+ counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0][bit]++;
+ if (!bit) {
+ counts->comp_ref[av1_get_pred_context_comp_ref_p1(cm, xd)][1]
+ [ref0 == LAST_FRAME]++;
+ } else {
+ counts->comp_ref[av1_get_pred_context_comp_ref_p2(cm, xd)][2]
+ [ref0 == GOLDEN_FRAME]++;
+ }
- counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(cm, xd)][0]
- [ref1 == ALTREF_FRAME]++;
-#else
+ counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(cm, xd)][0]
+ [ref1 == ALTREF_FRAME]++;
+#else // !CONFIG_EXT_REFS
counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0]
[ref0 == GOLDEN_FRAME]++;
#endif // CONFIG_EXT_REFS
+#if CONFIG_EXT_COMP_REFS
+ }
+#endif // CONFIG_EXT_COMP_REFS
} else {
#if CONFIG_EXT_REFS
const int bit = (ref0 == ALTREF_FRAME || ref0 == BWDREF_FRAME);
@@ -1609,7 +1691,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
[ref0 != LAST3_FRAME]++;
}
}
-#else
+#else // !CONFIG_EXT_REFS
counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0]
[ref0 != LAST_FRAME]++;
if (ref0 != LAST_FRAME) {
@@ -1619,7 +1701,14 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
#endif // CONFIG_EXT_REFS
}
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ counts->comp_inter_mode[av1_get_inter_mode_context(xd)]
+ [is_inter_singleref_comp_mode(mbmi->mode)]++;
+#endif // CONFIG_COMPOUND_SINGLEREF
+
+#if CONFIG_INTERINTRA
if (cm->reference_mode != COMPOUND_REFERENCE &&
#if CONFIG_SUPERTX
!supertx_enabled &&
@@ -1635,14 +1724,33 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
counts->interintra[bsize_group][0]++;
}
}
-#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif // CONFIG_INTERINTRA
+#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+#if CONFIG_WARPED_MOTION
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+#endif
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+ const MOTION_MODE motion_allowed =
+ motion_mode_allowed_wrapper(0,
+#if CONFIG_GLOBAL_MOTION
+ 0, xd->global_motion,
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
+ mi);
+#else
const MOTION_MODE motion_allowed = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#if CONFIG_GLOBAL_MOTION
0, xd->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
mi);
+#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
#if CONFIG_SUPERTX
if (!supertx_enabled)
#endif // CONFIG_SUPERTX
@@ -1660,11 +1768,28 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
if (motion_allowed > SIMPLE_TRANSLATION)
counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++;
#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+ if (mbmi->motion_mode == NCOBMC_ADAPT_WEIGHT) {
+ ADAPT_OVERLAP_BLOCK ao_block =
+ adapt_overlap_block_lookup[mbmi->sb_type];
+ ++counts->ncobmc_mode[ao_block][mbmi->ncobmc_mode[0]];
+ if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) {
+ ++counts->ncobmc_mode[ao_block][mbmi->ncobmc_mode[1]];
+ }
+ }
+#endif
+
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_EXT_INTER
- if (cm->reference_mode != SINGLE_REFERENCE &&
+ if (
+#if CONFIG_COMPOUND_SINGLEREF
+ is_inter_anyref_comp_mode(mbmi->mode)
+#else // !CONFIG_COMPOUND_SINGLEREF
+ cm->reference_mode != SINGLE_REFERENCE &&
is_inter_compound_mode(mbmi->mode)
+#endif // CONFIG_COMPOUND_SINGLEREF
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
&& mbmi->motion_mode == SIMPLE_TRANSLATION
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -1683,6 +1808,12 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
if (has_second_ref(mbmi)) {
mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(mode)) {
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ ++counts->inter_singleref_comp_mode[mode_ctx]
+ [INTER_SINGLEREF_COMP_OFFSET(mode)];
+#endif // CONFIG_COMPOUND_SINGLEREF
} else {
#endif // CONFIG_EXT_INTER
mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
@@ -1693,10 +1824,15 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
#endif // CONFIG_EXT_INTER
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV ||
+ mbmi->mode == SR_NEW_NEWMV) {
+#else // !CONFIG_COMPOUND_SINGLEREF
if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
-#else
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
if (mbmi->mode == NEWMV) {
-#endif
+#endif // CONFIG_EXT_INTER
uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
int idx;
@@ -1871,10 +2007,16 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_WARPED_MOTION
+ set_ref_ptrs(&cpi->common, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+#endif
const MOTION_MODE motion_allowed = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#if CONFIG_GLOBAL_MOTION
0, xd->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
xd->mi[0]);
check_ncobmc = is_inter_block(mbmi) && motion_allowed >= OBMC_CAUSAL;
if (!dry_run && check_ncobmc) {
@@ -1922,6 +2064,8 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
const BLOCK_SIZE subsize = get_subsize(bsize, partition);
#if CONFIG_EXT_PARTITION_TYPES
const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+ int quarter_step = mi_size_wide[bsize] / 4;
+ int i;
#endif
#if CONFIG_CB4X4
@@ -1933,6 +2077,11 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+#if CONFIG_SPEED_REFS
+ // First scanning pass of an SB is dry run only.
+ if (cpi->sb_scanning_pass_idx == 0) assert(dry_run == DRY_RUN_NORMAL);
+#endif // CONFIG_SPEED_REFS
+
if (!dry_run && ctx >= 0) td->counts->partition[ctx][partition]++;
#if CONFIG_SUPERTX
@@ -2115,6 +2264,24 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, dry_run, bsize2,
partition, &pc_tree->verticalb[2], rate);
break;
+ case PARTITION_HORZ_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+ if (i > 0 && this_mi_row >= cm->mi_rows) break;
+
+ encode_b(cpi, tile, td, tp, this_mi_row, mi_col, dry_run, subsize,
+ partition, &pc_tree->horizontal4[i], rate);
+ }
+ break;
+ case PARTITION_VERT_4:
+ for (i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+
+ encode_b(cpi, tile, td, tp, mi_row, this_mi_col, dry_run, subsize,
+ partition, &pc_tree->vertical4[i], rate);
+ }
+ break;
#endif // CONFIG_EXT_PARTITION_TYPES
default: assert(0 && "Invalid partition type."); break;
}
@@ -2302,8 +2469,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
if (none_rdc.rate < INT_MAX) {
none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
- none_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist);
+ none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
#if CONFIG_SUPERTX
none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
#endif
@@ -2473,7 +2639,9 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
case PARTITION_VERT_A:
case PARTITION_VERT_B:
case PARTITION_HORZ_A:
- case PARTITION_HORZ_B: assert(0 && "Cannot handle extended partiton types");
+ case PARTITION_HORZ_B:
+ case PARTITION_HORZ_4:
+ case PARTITION_VERT_4: assert(0 && "Cannot handle extended partiton types");
#endif // CONFIG_EXT_PARTITION_TYPES
default: assert(0); break;
}
@@ -2481,7 +2649,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
if (last_part_rdc.rate < INT_MAX) {
last_part_rdc.rate += cpi->partition_cost[pl][partition];
last_part_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist);
+ RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
#if CONFIG_SUPERTX
last_part_rate_nocoef += cpi->partition_cost[pl][partition];
#endif
@@ -2565,8 +2733,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
}
if (chosen_rdc.rate < INT_MAX) {
chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
- chosen_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist);
+ chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
#if CONFIG_SUPERTX
chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
#endif
@@ -2624,8 +2791,8 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
}
/* clang-format off */
-static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
-#if CONFIG_CB4X4
+static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
BLOCK_2X2, BLOCK_2X2, BLOCK_2X2, // 2x2, 2x4, 4x2
#endif
BLOCK_4X4, // 4x4
@@ -2634,12 +2801,14 @@ static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32
BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
- BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 // 64x128, 128x64, 128x128
+ BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
+ BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32
+ BLOCK_8X8 // 32x8
};
-static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
-#if CONFIG_CB4X4
+static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 2x2, 2x4, 4x2
#endif
BLOCK_8X8, // 4x4
@@ -2648,13 +2817,15 @@ static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32
BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
- BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST // 64x128, 128x64, 128x128
+ BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
+ BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 4x16, 16x4, 8x32
+ BLOCK_32X32 // 32x8
};
// Next square block size less or equal than current block size.
-static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
-#if CONFIG_CB4X4
+static const BLOCK_SIZE next_square_size[BLOCK_SIZES_ALL] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
BLOCK_2X2, BLOCK_2X2, BLOCK_2X2, // 2x2, 2x4, 4x2
#endif
BLOCK_4X4, // 4x4
@@ -2663,8 +2834,10 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32
BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64
#if CONFIG_EXT_PARTITION
- BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128
+ BLOCK_64X64, BLOCK_64X64, BLOCK_128X128, // 64x128, 128x64, 128x128
#endif // CONFIG_EXT_PARTITION
+ BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32
+ BLOCK_8X8 // 32x8
};
/* clang-format on */
@@ -3055,8 +3228,7 @@ static void rd_test_partition3(
cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
[supertx_size],
0);
- sum_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
TX_TYPE best_tx = DCT_DCT;
@@ -3071,8 +3243,7 @@ static void rd_test_partition3(
cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
[supertx_size],
1);
- tmp_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
if (tmp_rdc.rdcost < sum_rdc.rdcost) {
sum_rdc = tmp_rdc;
update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
@@ -3091,8 +3262,7 @@ static void rd_test_partition3(
#endif
bsize);
sum_rdc.rate += cpi->partition_cost[pl][partition];
- sum_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
#if CONFIG_SUPERTX
sum_rate_nocoef += cpi->partition_cost[pl][partition];
#endif
@@ -3161,7 +3331,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX;
int abort_flag;
- const int supertx_allowed = !frame_is_intra_only(cm) &&
+ const int supertx_allowed = !frame_is_intra_only(cm) && bsize >= BLOCK_8X8 &&
bsize <= MAX_SUPERTX_BLOCK_SIZE &&
!xd->lossless[0];
#endif // CONFIG_SUPERTX
@@ -3341,6 +3511,17 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
}
#endif
+#if CONFIG_SPEED_REFS
+ if (cpi->sb_scanning_pass_idx == 0) {
+ // NOTE: For the 1st pass of scanning, check all the subblocks of equal size
+ // only.
+ partition_none_allowed = (bsize == MIN_SPEED_REFS_BLKSIZE);
+ partition_horz_allowed = 0;
+ partition_vert_allowed = 0;
+ do_square_split = (bsize > MIN_SPEED_REFS_BLKSIZE);
+ }
+#endif // CONFIG_SPEED_REFS
+
// PARTITION_NONE
if (partition_none_allowed) {
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
@@ -3354,8 +3535,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
if (this_rdc.rate != INT_MAX) {
if (bsize_at_least_8x8) {
this_rdc.rate += partition_cost[PARTITION_NONE];
- this_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
+ this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
#if CONFIG_SUPERTX
this_rate_nocoef += partition_cost[PARTITION_NONE];
#endif
@@ -3494,8 +3674,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
cm->fc->supertx_prob[partition_supertx_context_lookup
[PARTITION_SPLIT]][supertx_size],
0);
- sum_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) {
TX_TYPE best_tx = DCT_DCT;
@@ -3512,8 +3691,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
cm->fc->supertx_prob[partition_supertx_context_lookup
[PARTITION_SPLIT]][supertx_size],
1);
- tmp_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
if (tmp_rdc.rdcost < sum_rdc.rdcost) {
sum_rdc = tmp_rdc;
update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
@@ -3551,6 +3729,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
&this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]);
#endif // CONFIG_SUPERTX
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) {
+ assert(this_rdc.dist_y < INT64_MAX);
+ }
+#endif
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
#if CONFIG_SUPERTX
@@ -3564,28 +3747,40 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- sum_rdc.dist_y += this_rdc.dist_y;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize == BLOCK_8X8) {
+ assert(this_rdc.dist_y < INT64_MAX);
+ sum_rdc.dist_y += this_rdc.dist_y;
+ }
#endif
}
}
reached_last_index = (idx == 4);
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (reached_last_index && sum_rdc.rdcost != INT64_MAX &&
bsize == BLOCK_8X8) {
- int use_activity_masking = 0;
- int64_t daala_dist;
+ int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
- daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride - 4,
- src_stride, x->decoded_8x8, 8, 8, 8, 1,
- use_activity_masking, x->qindex)
- << 4;
- sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
- sum_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ uint8_t *decoded_8x8;
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
+ else
+#endif
+ decoded_8x8 = (uint8_t *)x->decoded_8x8;
+
+ dist_8x8 =
+ av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride - 4,
+ src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8,
+ x->qindex)
+ << 4;
+ assert(sum_rdc.dist_y < INT64_MAX);
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
#if CONFIG_SUPERTX
if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) {
@@ -3598,8 +3793,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
cm->fc->supertx_prob[partition_supertx_context_lookup
[PARTITION_SPLIT]][supertx_size],
0);
- sum_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
TX_TYPE best_tx = DCT_DCT;
@@ -3616,8 +3810,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
cm->fc->supertx_prob[partition_supertx_context_lookup
[PARTITION_SPLIT]][supertx_size],
1);
- tmp_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
if (tmp_rdc.rdcost < sum_rdc.rdcost) {
sum_rdc = tmp_rdc;
update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
@@ -3632,7 +3825,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_SPLIT];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
#if CONFIG_SUPERTX
sum_rate_nocoef += partition_cost[PARTITION_SPLIT];
#endif // CONFIG_SUPERTX
@@ -3725,14 +3918,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col,
subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col,
subsize, NULL);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -3746,24 +3939,31 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- int use_activity_masking = 0;
- int64_t daala_dist;
+ int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
- daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride,
- src_stride, x->decoded_8x8, 8, 8, 8, 1,
- use_activity_masking, x->qindex)
- << 4;
- sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
- sum_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ uint8_t *decoded_8x8;
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
+ else
+#endif
+ decoded_8x8 = (uint8_t *)x->decoded_8x8;
+
+ dist_8x8 = av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride,
+ src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8,
+ 8, x->qindex)
+ << 4;
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
}
#if CONFIG_SUPERTX
@@ -3777,7 +3977,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]]
[supertx_size],
0);
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
TX_TYPE best_tx = DCT_DCT;
@@ -3795,8 +3995,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]]
[supertx_size],
1);
- tmp_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
if (tmp_rdc.rdcost < sum_rdc.rdcost) {
sum_rdc = tmp_rdc;
update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
@@ -3810,7 +4009,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
if (sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_HORZ];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
#if CONFIG_SUPERTX
sum_rate_nocoef += partition_cost[PARTITION_HORZ];
#endif // CONFIG_SUPERTX
@@ -3899,14 +4098,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step,
subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step,
subsize, NULL);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -3920,24 +4119,31 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- int use_activity_masking = 0;
- int64_t daala_dist;
+ int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
- daala_dist =
- av1_daala_dist(x->plane[0].src.buf - 4, src_stride, x->decoded_8x8,
- 8, 8, 8, 1, use_activity_masking, x->qindex)
+ uint8_t *decoded_8x8;
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
+ else
+#endif
+ decoded_8x8 = (uint8_t *)x->decoded_8x8;
+
+ dist_8x8 =
+ av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4, src_stride,
+ decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
<< 4;
- sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
- sum_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
}
#if CONFIG_SUPERTX
if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) {
@@ -3950,7 +4156,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]]
[supertx_size],
0);
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
TX_TYPE best_tx = DCT_DCT;
@@ -3968,8 +4174,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]]
[supertx_size],
1);
- tmp_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
if (tmp_rdc.rdcost < sum_rdc.rdcost) {
sum_rdc = tmp_rdc;
update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
@@ -3983,7 +4188,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
if (sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_VERT];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
#if CONFIG_SUPERTX
sum_rate_nocoef += partition_cost[PARTITION_VERT];
#endif // CONFIG_SUPERTX
@@ -4060,14 +4265,139 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
bsize2, mi_row + mi_step, mi_col + mi_step, bsize2);
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
}
+
+ // PARTITION_HORZ_4
+ // TODO(david.barker): For this and PARTITION_VERT_4,
+ // * Add support for BLOCK_16X16 once we support 2x8 and 8x2 blocks for the
+ // chroma plane
+ // * Add support for supertx
+ if (bsize == BLOCK_32X32 && partition_horz_allowed && !force_horz_split &&
+ (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
+ int i;
+ const int quarter_step = mi_size_high[bsize] / 4;
+ PICK_MODE_CONTEXT *ctx_prev = ctx_none;
+
+ subsize = get_subsize(bsize, PARTITION_HORZ_4);
+ av1_zero(sum_rdc);
+
+ for (i = 0; i < 4; ++i) {
+ int this_mi_row = mi_row + i * quarter_step;
+
+ if (i > 0 && this_mi_row >= cm->mi_rows) break;
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev);
+
+ ctx_prev = &pc_tree->horizontal4[i];
+
+ rd_pick_sb_modes(cpi, tile_data, x, this_mi_row, mi_col, &this_rdc,
+ PARTITION_HORZ_4, subsize, ctx_prev,
+ best_rdc.rdcost - sum_rdc.rdcost);
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ break;
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+ }
+
+ if (sum_rdc.rdcost >= best_rdc.rdcost) break;
+
+ if (i < 3) {
+ update_state(cpi, td, ctx_prev, this_mi_row, mi_col, subsize, 1);
+ encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, this_mi_row, mi_col,
+ subsize, NULL);
+ }
+ }
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ sum_rdc.rate += partition_cost[PARTITION_HORZ_4];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
+ pc_tree->partitioning = PARTITION_HORZ_4;
+ }
+ }
+#if !CONFIG_PVQ
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
+ }
+ // PARTITION_VERT_4
+ if (bsize == BLOCK_32X32 && partition_vert_allowed && !force_vert_split &&
+ (do_rectangular_split || av1_active_v_edge(cpi, mi_row, mi_step))) {
+ int i;
+ const int quarter_step = mi_size_wide[bsize] / 4;
+ PICK_MODE_CONTEXT *ctx_prev = ctx_none;
+
+ subsize = get_subsize(bsize, PARTITION_VERT_4);
+ av1_zero(sum_rdc);
+
+ for (i = 0; i < 4; ++i) {
+ int this_mi_col = mi_col + i * quarter_step;
+
+ if (i > 0 && this_mi_col >= cm->mi_cols) break;
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev);
+
+ ctx_prev = &pc_tree->vertical4[i];
+
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, this_mi_col, &this_rdc,
+ PARTITION_VERT_4, subsize, ctx_prev,
+ best_rdc.rdcost - sum_rdc.rdcost);
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+ }
+
+ if (sum_rdc.rdcost >= best_rdc.rdcost) break;
+
+ if (i < 3) {
+ update_state(cpi, td, ctx_prev, mi_row, this_mi_col, subsize, 1);
+ encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, this_mi_col,
+ subsize, NULL);
+ }
+ }
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ sum_rdc.rate += partition_cost[PARTITION_VERT_4];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
+ pc_tree->partitioning = PARTITION_VERT_4;
+ }
+ }
+#if !CONFIG_PVQ
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+ restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif
+ }
#endif // CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_SPEED_REFS
+ // First scanning is done.
+ if (cpi->sb_scanning_pass_idx == 0 && bsize == cm->sb_size) return;
+#endif // CONFIG_SPEED_REFS
+
// TODO(jbb): This code added so that we avoid static analysis
// warning related to the fact that best_rd isn't used after this
// point. This code should be refactored so that the duplicate
// checks occur in some sub function and thus are used...
(void)best_rd;
*rd_cost = best_rdc;
+
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
+ assert(rd_cost->dist_y < INT64_MAX);
+ }
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
#if CONFIG_SUPERTX
*rate_nocoef = best_rate_nocoef;
#endif // CONFIG_SUPERTX
@@ -4093,13 +4423,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
x->cfl_store_y = 0;
#endif
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
bsize == BLOCK_4X4 && pc_tree->index == 3) {
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
pc_tree, NULL);
}
-#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize == cm->sb_size) {
#if !CONFIG_PVQ && !CONFIG_LV_MAP
@@ -4112,6 +4442,22 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
}
}
+#if CONFIG_SPEED_REFS
+static void restore_mi(const AV1_COMP *const cpi, MACROBLOCK *const x,
+ int mi_row, int mi_col) {
+ const AV1_COMMON *cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+ int x_idx, y;
+ for (y = 0; y < mi_size_high[cm->sb_size]; y++)
+ for (x_idx = 0; x_idx < mi_size_wide[cm->sb_size]; x_idx++)
+ if (mi_col + x_idx < cm->mi_cols && mi_row + y < cm->mi_rows) {
+ memset(xd->mi + y * cm->mi_stride + x_idx, 0, sizeof(*xd->mi));
+ memset(x->mbmi_ext + y * cm->mi_cols + x_idx, 0, sizeof(*x->mbmi_ext));
+ }
+}
+#endif // CONFIG_SPEED_REFS
+
static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, int mi_row,
TOKENEXTRA **tp) {
@@ -4157,8 +4503,6 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
MODE_INFO **mi = cm->mi_grid_visible + idx_str;
PC_TREE *const pc_root = td->pc_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2];
- av1_update_boundary_info(cm, tile_info, mi_row, mi_col);
-
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < leaf_nodes; ++i)
td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
@@ -4258,12 +4602,35 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
&x->min_partition_size, &x->max_partition_size);
}
+#if CONFIG_SPEED_REFS
+ // NOTE: Two scanning passes for the current superblock - the first pass
+ // is only targeted to collect stats.
+ int m_search_count_backup = *(x->m_search_count_ptr);
+ for (int sb_pass_idx = 0; sb_pass_idx < 2; ++sb_pass_idx) {
+ cpi->sb_scanning_pass_idx = sb_pass_idx;
+ if (frame_is_intra_only(cm) && sb_pass_idx == 0) continue;
+
+ rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
+ &dummy_rdc,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ INT64_MAX, pc_root);
+ if (sb_pass_idx == 0) {
+ av1_zero(x->pred_mv);
+ pc_root->index = 0;
+ restore_mi(cpi, x, mi_row, mi_col);
+ *(x->m_search_count_ptr) = m_search_count_backup;
+ }
+ }
+#else // !CONFIG_SPEED_REFS
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
&dummy_rdc,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
INT64_MAX, pc_root);
+#endif // CONFIG_SPEED_REFS
}
}
}
@@ -4329,20 +4696,11 @@ static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) {
return LAST_FRAME;
}
-static TX_MODE select_tx_mode(const AV1_COMP *cpi, MACROBLOCKD *const xd) {
- int i, all_lossless = 1;
-
- if (cpi->common.seg.enabled) {
- for (i = 0; i < MAX_SEGMENTS; ++i) {
- if (!xd->lossless[i]) {
- all_lossless = 0;
- break;
- }
- }
- } else {
- all_lossless = xd->lossless[0];
- }
- if (all_lossless) return ONLY_4X4;
+static TX_MODE select_tx_mode(const AV1_COMP *cpi) {
+ if (cpi->common.all_lossless) return ONLY_4X4;
+#if CONFIG_VAR_TX_NO_TX_MODE
+ return TX_MODE_SELECT;
+#else
if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
return ALLOW_32X32 + CONFIG_TX64X64;
else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
@@ -4350,6 +4708,7 @@ static TX_MODE select_tx_mode(const AV1_COMP *cpi, MACROBLOCKD *const xd) {
return TX_MODE_SELECT;
else
return cpi->common.tx_mode;
+#endif // CONFIG_VAR_TX_NO_TX_MODE
}
void av1_init_tile_data(AV1_COMP *cpi) {
@@ -4372,7 +4731,7 @@ void av1_init_tile_data(AV1_COMP *cpi) {
TileDataEnc *const tile_data =
&cpi->tile_data[tile_row * tile_cols + tile_col];
int i, j;
- for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
for (j = 0; j < MAX_MODES; ++j) {
tile_data->thresh_freq_fact[i][j] = 32;
tile_data->mode_map[i][j] = j;
@@ -4415,12 +4774,8 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
int mi_row;
#if CONFIG_DEPENDENT_HORZTILES
-#if CONFIG_TILE_GROUPS
if ((!cm->dependent_horz_tiles) || (tile_row == 0) ||
tile_info->tg_horz_boundary) {
-#else
- if ((!cm->dependent_horz_tiles) || (tile_row == 0)) {
-#endif
av1_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
}
#else
@@ -4504,22 +4859,21 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
#endif
#endif // #if CONFIG_PVQ
-#if CONFIG_EC_ADAPT
this_tile->tctx = *cm->fc;
td->mb.e_mbd.tile_ctx = &this_tile->tctx;
-#endif // #if CONFIG_EC_ADAPT
#if CONFIG_CFL
MACROBLOCKD *const xd = &td->mb.e_mbd;
xd->cfl = &this_tile->cfl;
- cfl_init(xd->cfl, cm, xd->plane[AOM_PLANE_U].subsampling_x,
- xd->plane[AOM_PLANE_U].subsampling_y);
+ cfl_init(xd->cfl, cm);
#endif
#if CONFIG_PVQ
td->mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
#endif // CONFIG_PVQ
+ av1_setup_across_tile_boundary_info(cm, tile_info);
+
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
mi_row += cm->mib_size) {
encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
@@ -4656,6 +5010,36 @@ static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm,
}
#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_PALETTE
+// Estimate if the source frame is screen content, based on the portion of
+// blocks that have no more than 4 (experimentally selected) luma colors.
+static int is_screen_content(const uint8_t *src,
+#if CONFIG_HIGHBITDEPTH
+ int use_hbd, int bd,
+#endif // CONFIG_HIGHBITDEPTH
+ int stride, int width, int height) {
+ assert(src != NULL);
+ int counts = 0;
+ const int blk_w = 16;
+ const int blk_h = 16;
+ const int limit = 4;
+ for (int r = 0; r + blk_h <= height; r += blk_h) {
+ for (int c = 0; c + blk_w <= width; c += blk_w) {
+ const int n_colors =
+#if CONFIG_HIGHBITDEPTH
+ use_hbd ? av1_count_colors_highbd(src + r * stride + c, stride, blk_w,
+ blk_h, bd)
+ :
+#endif // CONFIG_HIGHBITDEPTH
+ av1_count_colors(src + r * stride + c, stride, blk_w, blk_h);
+ if (n_colors > 1 && n_colors <= limit) counts++;
+ }
+ }
+ // The threshold is 10%.
+ return counts * blk_h * blk_w * 10 > width * height;
+}
+#endif // CONFIG_PALETTE
+
static void encode_frame_internal(AV1_COMP *cpi) {
ThreadData *const td = &cpi->td;
MACROBLOCK *const x = &td->mb;
@@ -4682,6 +5066,23 @@ static void encode_frame_internal(AV1_COMP *cpi) {
av1_zero(rdc->coef_counts);
av1_zero(rdc->comp_pred_diff);
+#if CONFIG_PALETTE || CONFIG_INTRABC
+ if (frame_is_intra_only(cm)) {
+#if CONFIG_PALETTE
+ cm->allow_screen_content_tools =
+ cpi->oxcf.content == AOM_CONTENT_SCREEN ||
+ is_screen_content(cpi->source->y_buffer,
+#if CONFIG_HIGHBITDEPTH
+ cpi->source->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
+#endif // CONFIG_HIGHBITDEPTH
+ cpi->source->y_stride, cpi->source->y_width,
+ cpi->source->y_height);
+#else
+ cm->allow_screen_content_tools = cpi->oxcf.content == AOM_CONTENT_SCREEN;
+#endif // CONFIG_PALETTE
+ }
+#endif // CONFIG_PALETTE || CONFIG_INTRABC
+
#if CONFIG_GLOBAL_MOTION
av1_zero(rdc->global_motion_used);
av1_zero(cpi->gmparams_cost);
@@ -4709,6 +5110,8 @@ static void encode_frame_internal(AV1_COMP *cpi) {
memcpy(&cm->global_motion[frame], &cm->global_motion[pframe],
sizeof(WarpedMotionParams));
} else if (ref_buf[frame] &&
+ ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
+ ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame)) {
TransformationType model;
const int64_t ref_frame_error = av1_frame_error(
@@ -4716,8 +5119,8 @@ static void encode_frame_internal(AV1_COMP *cpi) {
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
#endif // CONFIG_HIGHBITDEPTH
ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
- cpi->source->y_buffer, 0, 0, cpi->source->y_width,
- cpi->source->y_height, cpi->source->y_stride);
+ cpi->source->y_buffer, cpi->source->y_width, cpi->source->y_height,
+ cpi->source->y_stride);
if (ref_frame_error == 0) continue;
@@ -4752,7 +5155,8 @@ static void encode_frame_internal(AV1_COMP *cpi) {
ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
cpi->source->y_buffer, cpi->source->y_width,
- cpi->source->y_height, cpi->source->y_stride, 3);
+ cpi->source->y_height, cpi->source->y_stride, 5,
+ best_warp_error);
if (warp_error < best_warp_error) {
best_warp_error = warp_error;
// Save the wm_params modified by refine_integerized_param()
@@ -4812,10 +5216,10 @@ static void encode_frame_internal(AV1_COMP *cpi) {
cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
xd->qindex[i] = qindex;
}
-
+ cm->all_lossless = all_lossless(cm, xd);
if (!cm->seg.enabled && xd->lossless[0]) x->optimize = 0;
- cm->tx_mode = select_tx_mode(cpi, xd);
+ cm->tx_mode = select_tx_mode(cpi);
#if CONFIG_DELTA_Q
// Fix delta q resolution for the moment
@@ -4859,18 +5263,32 @@ static void encode_frame_internal(AV1_COMP *cpi) {
#if CONFIG_TEMPMV_SIGNALING
if (cm->prev_frame) {
- cm->use_prev_frame_mvs &= !cm->error_resilient_mode &&
- cm->width == cm->prev_frame->buf.y_width &&
- cm->height == cm->prev_frame->buf.y_height &&
- !cm->intra_only && !cm->prev_frame->intra_only;
+ cm->use_prev_frame_mvs &=
+ !cm->error_resilient_mode &&
+#if CONFIG_FRAME_SUPERRES
+ cm->width == cm->last_width && cm->height == cm->last_height &&
+#else
+ cm->width == cm->prev_frame->buf.y_crop_width &&
+ cm->height == cm->prev_frame->buf.y_crop_height &&
+#endif // CONFIG_FRAME_SUPERRES
+ !cm->intra_only && !cm->prev_frame->intra_only && cm->last_show_frame;
} else {
cm->use_prev_frame_mvs = 0;
}
#else
- cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->prev_frame &&
- cm->width == cm->prev_frame->buf.y_crop_width &&
- cm->height == cm->prev_frame->buf.y_crop_height &&
- !cm->intra_only && cm->last_show_frame;
+ if (cm->prev_frame) {
+ cm->use_prev_frame_mvs = !cm->error_resilient_mode &&
+#if CONFIG_FRAME_SUPERRES
+ cm->width == cm->last_width &&
+ cm->height == cm->last_height &&
+#else
+ cm->width == cm->prev_frame->buf.y_crop_width &&
+ cm->height == cm->prev_frame->buf.y_crop_height &&
+#endif // CONFIG_FRAME_SUPERRES
+ !cm->intra_only && cm->last_show_frame;
+ } else {
+ cm->use_prev_frame_mvs = 0;
+ }
#endif // CONFIG_TEMPMV_SIGNALING
// Special case: set prev_mi to NULL when the previous mode info
@@ -4894,6 +5312,8 @@ static void encode_frame_internal(AV1_COMP *cpi) {
}
#endif
+ av1_setup_frame_boundary_info(cm);
+
// If allowed, encoding tiles in parallel with one thread handling one tile.
// TODO(geza.lore): The multi-threaded encoder is not safe with more than
// 1 tile rows, as it uses the single above_context et al arrays from
@@ -4921,7 +5341,11 @@ static void make_consistent_compound_tools(AV1_COMMON *cm) {
cm->allow_interintra_compound = 0;
#endif // CONFIG_INTERINTRA
#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
+#if CONFIG_COMPOUND_SINGLEREF
+ if (frame_is_intra_only(cm))
+#else // !CONFIG_COMPOUND_SINGLEREF
if (frame_is_intra_only(cm) || cm->reference_mode == SINGLE_REFERENCE)
+#endif // CONFIG_COMPOUND_SINGLEREF
cm->allow_masked_compound = 0;
#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
}
@@ -4942,14 +5366,14 @@ void av1_encode_frame(AV1_COMP *cpi) {
// side behavior is where the ALT ref buffer has opposite sign bias to
// the other two.
if (!frame_is_intra_only(cm)) {
-#if !CONFIG_ONE_SIDED_COMPOUND
+#if !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS)
if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
(cm->ref_frame_sign_bias[ALTREF_FRAME] ==
cm->ref_frame_sign_bias[LAST_FRAME])) {
cpi->allow_comp_inter_inter = 0;
} else {
-#endif
+#endif // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS)
cpi->allow_comp_inter_inter = 1;
#if CONFIG_EXT_REFS
cm->comp_fwd_ref[0] = LAST_FRAME;
@@ -4962,10 +5386,11 @@ void av1_encode_frame(AV1_COMP *cpi) {
cm->comp_fixed_ref = ALTREF_FRAME;
cm->comp_var_ref[0] = LAST_FRAME;
cm->comp_var_ref[1] = GOLDEN_FRAME;
-#endif // CONFIG_EXT_REFS
-#if !CONFIG_ONE_SIDED_COMPOUND // Normative in encoder
+#endif // CONFIG_EXT_REFS
+#if !(CONFIG_ONE_SIDED_COMPOUND || \
+ CONFIG_EXT_COMP_REFS) // Normative in encoder
}
-#endif
+#endif // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS)
} else {
cpi->allow_comp_inter_inter = 0;
}
@@ -4998,7 +5423,12 @@ void av1_encode_frame(AV1_COMP *cpi) {
else
cm->reference_mode = REFERENCE_MODE_SELECT;
#else
+#if CONFIG_BGSPRITE
+ (void)is_alt_ref;
+ if (!cpi->allow_comp_inter_inter)
+#else
if (is_alt_ref || !cpi->allow_comp_inter_inter)
+#endif // CONFIG_BGSPRITE
cm->reference_mode = SINGLE_REFERENCE;
else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] &&
@@ -5017,25 +5447,23 @@ void av1_encode_frame(AV1_COMP *cpi) {
#if CONFIG_EXT_INTER
make_consistent_compound_tools(cm);
#endif // CONFIG_EXT_INTER
+
+ rdc->single_ref_used_flag = 0;
+ rdc->compound_ref_used_flag = 0;
+
encode_frame_internal(cpi);
for (i = 0; i < REFERENCE_MODES; ++i)
mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- int single_count_zero = 0;
- int comp_count_zero = 0;
-
- for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
- single_count_zero += counts->comp_inter[i][0];
- comp_count_zero += counts->comp_inter[i][1];
- }
-
- if (comp_count_zero == 0) {
+ // Use a flag that includes 4x4 blocks
+ if (rdc->compound_ref_used_flag == 0) {
cm->reference_mode = SINGLE_REFERENCE;
av1_zero(counts->comp_inter);
#if !CONFIG_REF_ADAPT
- } else if (single_count_zero == 0) {
+ // Use a flag that includes 4x4 blocks
+ } else if (rdc->single_ref_used_flag == 0) {
cm->reference_mode = COMPOUND_REFERENCE;
av1_zero(counts->comp_inter);
#endif // !CONFIG_REF_ADAPT
@@ -5046,10 +5474,15 @@ void av1_encode_frame(AV1_COMP *cpi) {
#endif // CONFIG_EXT_INTER
#if CONFIG_VAR_TX
+#if CONFIG_RECT_TX_EXT
+ if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0 &&
+ counts->quarter_tx_size[1] == 0)
+#else
if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
+#endif
cm->tx_mode = ALLOW_32X32 + CONFIG_TX64X64;
#else
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && CONFIG_EXT_TX
if (cm->tx_mode == TX_MODE_SELECT && counts->quarter_tx_size[1] == 0) {
#else
if (cm->tx_mode == TX_MODE_SELECT) {
@@ -5232,12 +5665,20 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
const MODE_INFO *left_mi, const int intraonly,
const int mi_row, const int mi_col) {
const MB_MODE_INFO *const mbmi = &mi->mbmi;
+#if CONFIG_ENTROPY_STATS
const PREDICTION_MODE y_mode = mbmi->mode;
- const PREDICTION_MODE uv_mode = mbmi->uv_mode;
+ const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
+#else // CONFIG_ENTROPY_STATS
+ (void)counts;
+ (void)above_mi;
+ (void)left_mi;
+ (void)intraonly;
+#endif // CONFIG_ENTROPY_STATS
const BLOCK_SIZE bsize = mbmi->sb_type;
const int unify_bsize = CONFIG_CB4X4;
if (bsize < BLOCK_8X8 && !unify_bsize) {
+#if CONFIG_ENTROPY_STATS
int idx, idy;
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -5253,7 +5694,9 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
++counts->y_mode[0][bmode];
}
}
+#endif // CONFIG_ENTROPY_STATS
} else {
+#if CONFIG_ENTROPY_STATS
if (intraonly) {
const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, 0);
const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, 0);
@@ -5261,6 +5704,7 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
} else {
++counts->y_mode[size_group_lookup[bsize]][y_mode];
}
+#endif // CONFIG_ENTROPY_STATS
#if CONFIG_FILTER_INTRA
if (mbmi->mode == DC_PRED
#if CONFIG_PALETTE
@@ -5271,7 +5715,7 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
mbmi->filter_intra_mode_info.use_filter_intra_mode[0];
++counts->filter_intra[0][use_filter_intra_mode];
}
- if (mbmi->uv_mode == DC_PRED
+ if (mbmi->uv_mode == UV_DC_PRED
#if CONFIG_CB4X4
&&
is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
@@ -5306,7 +5750,9 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
(void)mi_col;
(void)xd;
#endif
+#if CONFIG_ENTROPY_STATS
++counts->uv_mode[y_mode][uv_mode];
+#endif // CONFIG_ENTROPY_STATS
}
#if CONFIG_VAR_TX
@@ -5325,9 +5771,17 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+#if CONFIG_RECT_TX_EXT
+ if (tx_size == plane_tx_size ||
+ mbmi->tx_size == quarter_txsize_lookup[mbmi->sb_type]) {
+#else
if (tx_size == plane_tx_size) {
+#endif
++counts->txfm_partition[ctx][0];
- mbmi->tx_size = tx_size;
+#if CONFIG_RECT_TX_EXT
+ if (tx_size == plane_tx_size)
+#endif
+ mbmi->tx_size = tx_size;
txfm_partition_update(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row, tx_size, tx_size);
} else {
@@ -5438,18 +5892,22 @@ static void tx_partition_set_contexts(const AV1_COMMON *const cm,
void av1_update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
#if CONFIG_TXK_SEL
- int block, int plane,
+ int blk_row, int blk_col, int block, int plane,
#endif
BLOCK_SIZE bsize, TX_SIZE tx_size,
FRAME_COUNTS *counts) {
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
int is_inter = is_inter_block(mbmi);
+
#if !CONFIG_TXK_SEL
TX_TYPE tx_type = mbmi->tx_type;
#else
+ (void)blk_row;
+ (void)blk_col;
// Only y plane's tx_type is updated
if (plane > 0) return;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
+ TX_TYPE tx_type =
+ av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, block, tx_size);
#endif
#if CONFIG_EXT_TX
if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
@@ -5509,7 +5967,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
x->pvq_coded = (dry_run == OUTPUT_ENABLED) ? 1 : 0;
#endif
#if CONFIG_CFL
- x->cfl_store_y = (dry_run == OUTPUT_ENABLED) ? 1 : 0;
+ x->cfl_store_y = 1;
#endif
if (!is_inter) {
@@ -5526,13 +5984,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_PALETTE
if (bsize >= BLOCK_8X8 && !dry_run) {
for (plane = 0; plane <= 1; ++plane) {
- if (mbmi->palette_mode_info.palette_size[plane] > 0) {
- mbmi->palette_mode_info.palette_first_color_idx[plane] =
- xd->plane[plane].color_index_map[0];
- // TODO(huisu): this increases the use of token buffer. Needs stretch
- // test to verify.
+ if (mbmi->palette_mode_info.palette_size[plane] > 0)
av1_tokenize_palette_sb(cpi, td, plane, t, dry_run, bsize, rate);
- }
}
}
#endif // CONFIG_PALETTE
@@ -5559,9 +6012,21 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
&xd->block_refs[ref]->sf);
}
- av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, block_size);
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // Single ref compound mode
+ if (!is_compound && is_inter_singleref_comp_mode(mbmi->mode)) {
+ xd->block_refs[1] = xd->block_refs[0];
+ YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[0]);
+#if CONFIG_INTRABC
+ assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
+#else
+ assert(cfg != NULL);
+#endif // !CONFIG_INTRABC
+ av1_setup_pre_planes(xd, 1, cfg, mi_row, mi_col, &xd->block_refs[1]->sf);
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
- av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, NULL, block_size);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, block_size);
#if CONFIG_MOTION_VAR
if (mbmi->motion_mode == OBMC_CAUSAL) {
#if CONFIG_NCOBMC
@@ -5587,10 +6052,11 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
#endif
}
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) {
- daala_dist_set_sub8x8_dst(x, x->decoded_8x8, bsize, block_size_wide[bsize],
- block_size_high[bsize], mi_row, mi_col);
+ dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
+ block_size_wide[bsize], block_size_high[bsize],
+ mi_row, mi_col);
}
#endif
@@ -5629,13 +6095,16 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
#endif
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
if (is_quarter_tx_allowed(xd, mbmi, is_inter) &&
- mbmi->tx_size != txsize_sqr_up_map[mbmi->tx_size]) {
- ++td->counts->quarter_tx_size[mbmi->tx_size ==
- quarter_txsize_lookup[mbmi->sb_type]];
+ quarter_txsize_lookup[bsize] != max_txsize_rect_lookup[bsize] &&
+ (mbmi->tx_size == quarter_txsize_lookup[bsize] ||
+ mbmi->tx_size == max_txsize_rect_lookup[bsize])) {
+ ++td->counts
+ ->quarter_tx_size[mbmi->tx_size == quarter_txsize_lookup[bsize]];
}
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -5673,8 +6142,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
}
++td->counts->tx_size_totals[txsize_sqr_map[tx_size]];
- ++td->counts
- ->tx_size_totals[txsize_sqr_map[get_uv_tx_size(mbmi, &xd->plane[1])]];
+ ++td->counts->tx_size_totals[txsize_sqr_map[av1_get_uv_tx_size(
+ mbmi, &xd->plane[1])]];
#if !CONFIG_TXK_SEL
av1_update_tx_type_count(cm, xd, bsize, tx_size, td->counts);
#endif
@@ -5837,7 +6306,7 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_EXT_INTER
int mi_row_ori, int mi_col_ori,
#endif // CONFIG_EXT_INTER
- int mi_row_pred, int mi_col_pred,
+ int mi_row_pred, int mi_col_pred, int plane,
BLOCK_SIZE bsize_pred, int b_sub8x8, int block) {
// Used in supertx
// (mi_row_ori, mi_col_ori): location for mv
@@ -5859,28 +6328,39 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
&xd->block_refs[ref]->sf);
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // Single ref compound mode
+ if (!is_compound && is_inter_singleref_comp_mode(mbmi->mode)) {
+ xd->block_refs[1] = xd->block_refs[0];
+ YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[0]);
+ av1_setup_pre_planes(xd, 1, cfg, mi_row_pred, mi_col_pred,
+ &xd->block_refs[1]->sf);
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
if (!b_sub8x8)
- av1_build_inter_predictors_sb_extend(cm, xd,
+ av1_build_inter_predictor_sb_extend(cm, xd,
#if CONFIG_EXT_INTER
- mi_row_ori, mi_col_ori,
+ mi_row_ori, mi_col_ori,
#endif // CONFIG_EXT_INTER
- mi_row_pred, mi_col_pred, bsize_pred);
+ mi_row_pred, mi_col_pred, plane,
+ bsize_pred);
else
- av1_build_inter_predictors_sb_sub8x8_extend(cm, xd,
+ av1_build_inter_predictor_sb_sub8x8_extend(cm, xd,
#if CONFIG_EXT_INTER
- mi_row_ori, mi_col_ori,
+ mi_row_ori, mi_col_ori,
#endif // CONFIG_EXT_INTER
- mi_row_pred, mi_col_pred,
- bsize_pred, block);
+ mi_row_pred, mi_col_pred, plane,
+ bsize_pred, block);
}
static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int block,
int mi_row_ori, int mi_col_ori, int mi_row_pred,
int mi_col_pred, int mi_row_top, int mi_col_top,
- uint8_t *dst_buf[3], int dst_stride[3],
+ int plane, uint8_t *dst_buf, int dst_stride,
BLOCK_SIZE bsize_top, BLOCK_SIZE bsize_pred,
- RUN_TYPE dry_run, int b_sub8x8, int bextend) {
+ RUN_TYPE dry_run, int b_sub8x8) {
// Used in supertx
// (mi_row_ori, mi_col_ori): location for mv
// (mi_row_pred, mi_col_pred, bsize_pred): region to predict
@@ -5905,34 +6385,27 @@ static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td,
set_offsets_extend(cpi, td, tile, mi_row_pred, mi_col_pred, mi_row_ori,
mi_col_ori, bsize_pred);
- xd->plane[0].dst.stride = dst_stride[0];
- xd->plane[1].dst.stride = dst_stride[1];
- xd->plane[2].dst.stride = dst_stride[2];
- xd->plane[0].dst.buf = dst_buf[0] +
- (r >> xd->plane[0].subsampling_y) * dst_stride[0] +
- (c >> xd->plane[0].subsampling_x);
- xd->plane[1].dst.buf = dst_buf[1] +
- (r >> xd->plane[1].subsampling_y) * dst_stride[1] +
- (c >> xd->plane[1].subsampling_x);
- xd->plane[2].dst.buf = dst_buf[2] +
- (r >> xd->plane[2].subsampling_y) * dst_stride[2] +
- (c >> xd->plane[2].subsampling_x);
+ xd->plane[plane].dst.stride = dst_stride;
+ xd->plane[plane].dst.buf =
+ dst_buf + (r >> xd->plane[plane].subsampling_y) * dst_stride +
+ (c >> xd->plane[plane].subsampling_x);
predict_superblock(cpi, td,
#if CONFIG_EXT_INTER
mi_row_ori, mi_col_ori,
#endif // CONFIG_EXT_INTER
- mi_row_pred, mi_col_pred, bsize_pred, b_sub8x8, block);
+ mi_row_pred, mi_col_pred, plane, bsize_pred, b_sub8x8,
+ block);
- if (!dry_run && !bextend)
+ if (!dry_run && (plane == 0) && (block == 0 || !b_sub8x8))
update_stats(&cpi->common, td, mi_row_pred, mi_col_pred, 1);
}
static void extend_dir(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int block, BLOCK_SIZE bsize,
- BLOCK_SIZE top_bsize, int mi_row, int mi_col,
- int mi_row_top, int mi_col_top, RUN_TYPE dry_run,
- uint8_t *dst_buf[3], int dst_stride[3], int dir) {
+ BLOCK_SIZE top_bsize, int mi_row_ori, int mi_col_ori,
+ int mi_row, int mi_col, int mi_row_top, int mi_col_top,
+ int plane, uint8_t *dst_buf, int dst_stride, int dir) {
// dir: 0-lower, 1-upper, 2-left, 3-right
// 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright
MACROBLOCKD *xd = &td->mb.e_mbd;
@@ -5973,10 +6446,10 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td,
for (j = 0; j < mi_height + ext_offset; j += high_unit)
for (i = 0; i < mi_width + ext_offset; i += wide_unit)
- predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j,
- mi_col_pred + i, mi_row_top, mi_col_top, dst_buf,
- dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8,
- 1);
+ predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori,
+ mi_row_pred + j, mi_col_pred + i, mi_row_top,
+ mi_col_top, plane, dst_buf, dst_stride, top_bsize,
+ extend_bsize, 1, b_sub8x8);
} else if (dir == 2 || dir == 3) { // left and right
extend_bsize =
(mi_height == mi_size_high[BLOCK_8X8] || bsize < BLOCK_8X8 || yss < xss)
@@ -5996,10 +6469,10 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td,
for (j = 0; j < mi_height + ext_offset; j += high_unit)
for (i = 0; i < mi_width + ext_offset; i += wide_unit)
- predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j,
- mi_col_pred + i, mi_row_top, mi_col_top, dst_buf,
- dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8,
- 1);
+ predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori,
+ mi_row_pred + j, mi_col_pred + i, mi_row_top,
+ mi_col_top, plane, dst_buf, dst_stride, top_bsize,
+ extend_bsize, 1, b_sub8x8);
} else {
extend_bsize = BLOCK_8X8;
#if CONFIG_CB4X4
@@ -6018,35 +6491,24 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td,
for (j = 0; j < mi_height + ext_offset; j += high_unit)
for (i = 0; i < mi_width + ext_offset; i += wide_unit)
- predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j,
- mi_col_pred + i, mi_row_top, mi_col_top, dst_buf,
- dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8,
- 1);
+ predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori,
+ mi_row_pred + j, mi_col_pred + i, mi_row_top,
+ mi_col_top, plane, dst_buf, dst_stride, top_bsize,
+ extend_bsize, 1, b_sub8x8);
}
}
static void extend_all(const AV1_COMP *const cpi, ThreadData *td,
const TileInfo *const tile, int block, BLOCK_SIZE bsize,
- BLOCK_SIZE top_bsize, int mi_row, int mi_col,
- int mi_row_top, int mi_col_top, RUN_TYPE dry_run,
- uint8_t *dst_buf[3], int dst_stride[3]) {
+ BLOCK_SIZE top_bsize, int mi_row_ori, int mi_col_ori,
+ int mi_row, int mi_col, int mi_row_top, int mi_col_top,
+ int plane, uint8_t *dst_buf, int dst_stride) {
assert(block >= 0 && block < 4);
- extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride, 0);
- extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride, 1);
- extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride, 2);
- extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride, 3);
- extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride, 4);
- extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride, 5);
- extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride, 6);
- extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride, 7);
+ for (int i = 0; i < 8; ++i) {
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row_ori, mi_col_ori,
+ mi_row, mi_col, mi_row_top, mi_col_top, plane, dst_buf,
+ dst_stride, i);
+ }
}
// This function generates prediction for multiple blocks, between which
@@ -6140,29 +6602,36 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
switch (partition) {
case PARTITION_NONE:
assert(bsize < top_bsize);
- predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
- bsize, dry_run, 0, 0);
- extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, mi_row_top,
- mi_col_top, dry_run, dst_buf, dst_stride);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i],
+ top_bsize, bsize, dry_run, 0);
+ extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, mi_row,
+ mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i]);
+ }
break;
case PARTITION_HORZ:
if (bsize == BLOCK_8X8 && !unify_bsize) {
- // Fisrt half
- predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
- BLOCK_8X8, dry_run, 1, 0);
- if (bsize < top_bsize)
- extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride);
-
- // Second half
- predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf1, dst_stride1,
- top_bsize, BLOCK_8X8, dry_run, 1, 1);
- if (bsize < top_bsize)
- extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ // First half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i],
+ top_bsize, BLOCK_8X8, dry_run, 1);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i]);
+
+ // Second half
+ predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf1[i],
+ dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i],
+ dst_stride1[i]);
+ }
// Smooth
xd->plane[0].dst.buf = dst_buf[0];
@@ -6172,60 +6641,89 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
0);
} else {
- // First half
- predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
- subsize, dry_run, 0, 0);
- if (bsize < top_bsize)
- extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride);
- else
- extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride, 0);
-
- if (mi_row + hbs < cm->mi_rows) {
- // Second half
- predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
- mi_col, mi_row_top, mi_col_top, dst_buf1,
- dst_stride1, top_bsize, subsize, dry_run, 0, 0);
- if (bsize < top_bsize)
- extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
- mi_col, mi_row_top, mi_col_top, dry_run, dst_buf1,
- dst_stride1);
- else
- extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
- mi_col, mi_row_top, mi_col_top, dry_run, dst_buf1,
- dst_stride1, 1);
-
- // Smooth
- for (i = 0; i < MAX_MB_PLANE; i++) {
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_CB4X4
+ const struct macroblockd_plane *pd = &xd->plane[i];
+ int handle_chroma_sub8x8 = need_handle_chroma_sub8x8(
+ subsize, pd->subsampling_x, pd->subsampling_y);
+
+ if (handle_chroma_sub8x8) {
+ int mode_offset_row = CONFIG_CHROMA_SUB8X8 ? hbs : 0;
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + mode_offset_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i,
+ dst_buf[i], dst_stride[i], top_bsize, bsize,
+ dry_run, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, bsize, top_bsize,
+ mi_row + mode_offset_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i]);
+ } else {
+#endif
+ // First half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i], top_bsize, subsize, dry_run, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i]);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i], 0);
xd->plane[i].dst.buf = dst_buf[i];
xd->plane[i].dst.stride = dst_stride[i];
- av1_build_masked_inter_predictor_complex(
- xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
- mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
- PARTITION_HORZ, i);
+
+ if (mi_row + hbs < cm->mi_rows) {
+ // Second half
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col,
+ mi_row + hbs, mi_col, mi_row_top, mi_col_top, i,
+ dst_buf1[i], dst_stride1[i], top_bsize, subsize,
+ dry_run, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ i, dst_buf1[i], dst_stride1[i]);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ i, dst_buf1[i], dst_stride1[i], 1);
+ // Smooth
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ av1_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+#if CONFIG_CB4X4
}
+#endif
}
}
break;
case PARTITION_VERT:
if (bsize == BLOCK_8X8 && !unify_bsize) {
- // First half
- predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
- BLOCK_8X8, dry_run, 1, 0);
- if (bsize < top_bsize)
- extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride);
-
- // Second half
- predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf1, dst_stride1,
- top_bsize, BLOCK_8X8, dry_run, 1, 1);
- if (bsize < top_bsize)
- extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ // First half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i],
+ top_bsize, BLOCK_8X8, dry_run, 1);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i]);
+
+ // Second half
+ predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf1[i],
+ dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i],
+ dst_stride1[i]);
+ }
// Smooth
xd->plane[0].dst.buf = dst_buf[0];
@@ -6235,66 +6733,160 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
0);
} else {
- // bsize: not important, not useful
- predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
- subsize, dry_run, 0, 0);
- if (bsize < top_bsize)
- extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride);
- else
- extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride, 3);
-
- if (mi_col + hbs < cm->mi_cols) {
- predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
- mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
- dst_stride1, top_bsize, subsize, dry_run, 0, 0);
- if (bsize < top_bsize)
- extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row,
- mi_col + hbs, mi_row_top, mi_col_top, dry_run, dst_buf1,
- dst_stride1);
- else
- extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row,
- mi_col + hbs, mi_row_top, mi_col_top, dry_run, dst_buf1,
- dst_stride1, 2);
-
- for (i = 0; i < MAX_MB_PLANE; i++) {
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_CB4X4
+ const struct macroblockd_plane *pd = &xd->plane[i];
+ int handle_chroma_sub8x8 = need_handle_chroma_sub8x8(
+ subsize, pd->subsampling_x, pd->subsampling_y);
+
+ if (handle_chroma_sub8x8) {
+ int mode_offset_col = CONFIG_CHROMA_SUB8X8 ? hbs : 0;
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + mode_offset_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i,
+ dst_buf[i], dst_stride[i], top_bsize, bsize,
+ dry_run, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row,
+ mi_col + mode_offset_col, mi_row, mi_col, mi_row_top,
+ mi_col_top, i, dst_buf[i], dst_stride[i]);
+ } else {
+#endif
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i], top_bsize, subsize, dry_run, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i]);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i], 3);
xd->plane[i].dst.buf = dst_buf[i];
xd->plane[i].dst.stride = dst_stride[i];
- av1_build_masked_inter_predictor_complex(
- xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
- mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
- PARTITION_VERT, i);
+
+ if (mi_col + hbs < cm->mi_cols) {
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, i,
+ dst_buf1[i], dst_stride1[i], top_bsize, subsize,
+ dry_run, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row, mi_col + hbs, mi_row_top,
+ mi_col_top, i, dst_buf1[i], dst_stride1[i]);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row, mi_col + hbs, mi_row_top,
+ mi_col_top, i, dst_buf1[i], dst_stride1[i], 2);
+
+ // smooth
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ av1_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+#if CONFIG_CB4X4
}
+#endif
}
}
break;
case PARTITION_SPLIT:
if (bsize == BLOCK_8X8 && !unify_bsize) {
- predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
- BLOCK_8X8, dry_run, 1, 0);
- predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf1, dst_stride1,
- top_bsize, BLOCK_8X8, dry_run, 1, 1);
- predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf2, dst_stride2,
- top_bsize, BLOCK_8X8, dry_run, 1, 1);
- predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col,
- mi_row_top, mi_col_top, dst_buf3, dst_stride3,
- top_bsize, BLOCK_8X8, dry_run, 1, 1);
-
- if (bsize < top_bsize) {
- extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride);
- extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1);
- extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf2, dst_stride2);
- extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col,
- mi_row_top, mi_col_top, dry_run, dst_buf3, dst_stride3);
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i],
+ top_bsize, BLOCK_8X8, dry_run, 1);
+ predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf1[i],
+ dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1);
+ predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf2[i],
+ dst_stride2[i], top_bsize, BLOCK_8X8, dry_run, 1);
+ predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf3[i],
+ dst_stride3[i], top_bsize, BLOCK_8X8, dry_run, 1);
+
+ if (bsize < top_bsize) {
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i]);
+ extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i],
+ dst_stride1[i]);
+ extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf2[i],
+ dst_stride2[i]);
+ extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf3[i],
+ dst_stride3[i]);
+ }
+ }
+#if CONFIG_CB4X4
+ } else if (bsize == BLOCK_8X8) {
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ const struct macroblockd_plane *pd = &xd->plane[i];
+ int handle_chroma_sub8x8 = need_handle_chroma_sub8x8(
+ subsize, pd->subsampling_x, pd->subsampling_y);
+
+ if (handle_chroma_sub8x8) {
+ int mode_offset_row =
+ CONFIG_CHROMA_SUB8X8 && mi_row + hbs < cm->mi_rows ? hbs : 0;
+ int mode_offset_col =
+ CONFIG_CHROMA_SUB8X8 && mi_col + hbs < cm->mi_cols ? hbs : 0;
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + mode_offset_row,
+ mi_col + mode_offset_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i], top_bsize, BLOCK_8X8, dry_run, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, BLOCK_8X8, top_bsize,
+ mi_row + mode_offset_row, mi_col + mode_offset_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i]);
+ } else {
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i], top_bsize, subsize, dry_run, 0);
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, i,
+ dst_buf1[i], dst_stride1[i], top_bsize, subsize,
+ dry_run, 0);
+ if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols)
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col,
+ mi_row + hbs, mi_col, mi_row_top, mi_col_top, i,
+ dst_buf2[i], dst_stride2[i], top_bsize, subsize,
+ dry_run, 0);
+ if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top,
+ mi_col_top, i, dst_buf3[i], dst_stride3[i],
+ top_bsize, subsize, dry_run, 0);
+
+ if (bsize < top_bsize) {
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i],
+ dst_stride[i]);
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row, mi_col + hbs, mi_row_top,
+ mi_col_top, i, dst_buf1[i], dst_stride1[i]);
+ if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ i, dst_buf2[i], dst_stride2[i]);
+ if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col + hbs, mi_row + hbs, mi_col + hbs, mi_row_top,
+ mi_col_top, i, dst_buf3[i], dst_stride3[i]);
+ }
+ }
}
+#endif
} else {
predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row_top,
mi_col_top, dry_run, subsize, top_bsize, dst_buf,
@@ -6314,10 +6906,16 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
pc_tree->split[3]);
}
for (i = 0; i < MAX_MB_PLANE; i++) {
-#if !CONFIG_CB4X4
+#if CONFIG_CB4X4
+ const struct macroblockd_plane *pd = &xd->plane[i];
+ int handle_chroma_sub8x8 = need_handle_chroma_sub8x8(
+ subsize, pd->subsampling_x, pd->subsampling_y);
+ if (handle_chroma_sub8x8) continue; // Skip <4x4 chroma smoothing
+#else
if (bsize == BLOCK_8X8 && i != 0)
continue; // Skip <4x4 chroma smoothing
#endif
+
if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
av1_build_masked_inter_predictor_complex(
xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
@@ -6334,9 +6932,6 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
PARTITION_HORZ, i);
}
} else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) {
- if (bsize == BLOCK_8X8 && i != 0)
- continue; // Skip <4x4 chroma smoothing
-
av1_build_masked_inter_predictor_complex(
xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i],
mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
@@ -6660,8 +7255,7 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
*tmp_rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
x->skip = 1;
} else {
- if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) <
- RDCOST(x->rdmult, x->rddiv, 0, sse)) {
+ if (RDCOST(x->rdmult, *tmp_rate, *tmp_dist) < RDCOST(x->rdmult, 0, sse)) {
*tmp_rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
x->skip = 0;
} else {
@@ -6671,7 +7265,7 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
}
}
*tmp_rate += base_rate;
- rd_tx = RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist);
+ rd_tx = RDCOST(x->rdmult, *tmp_rate, *tmp_dist);
if (rd_tx < bestrd_tx * 0.99 || tx_type == DCT_DCT) {
*best_tx = tx_type;
bestrd_tx = rd_tx;
diff --git a/third_party/aom/av1/encoder/encodeframe.h b/third_party/aom/av1/encoder/encodeframe.h
index 46a99e1cf..569ec9f72 100644
--- a/third_party/aom/av1/encoder/encodeframe.h
+++ b/third_party/aom/av1/encoder/encodeframe.h
@@ -37,7 +37,7 @@ void av1_encode_tile(struct AV1_COMP *cpi, struct ThreadData *td, int tile_row,
void av1_update_tx_type_count(const struct AV1Common *cm, MACROBLOCKD *xd,
#if CONFIG_TXK_SEL
- int block, int plane,
+ int blk_row, int blk_col, int block, int plane,
#endif
BLOCK_SIZE bsize, TX_SIZE tx_size,
FRAME_COUNTS *counts);
diff --git a/third_party/aom/av1/encoder/encodemb.c b/third_party/aom/av1/encoder/encodemb.c
index 7c9781533..e7f4d313d 100644
--- a/third_party/aom/av1/encoder/encodemb.c
+++ b/third_party/aom/av1/encoder/encodemb.c
@@ -112,19 +112,9 @@ void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
// These numbers are empirically obtained.
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
-#if CONFIG_EC_ADAPT
{ 10, 7 }, { 8, 5 },
-#else
- { 10, 6 }, { 8, 6 },
-#endif
};
-#define UPDATE_RD_COST() \
- { \
- rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
- rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
- }
-
static INLINE unsigned int get_token_bit_costs(
unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob,
int ctx, int token) {
@@ -133,23 +123,14 @@ static INLINE unsigned int get_token_bit_costs(
}
#if !CONFIG_LV_MAP
-#define USE_GREEDY_OPTIMIZE_B 0
-
-#if USE_GREEDY_OPTIMIZE_B
-
-typedef struct av1_token_state_greedy {
- int16_t token;
- tran_low_t qc;
- tran_low_t dqc;
-} av1_token_state_greedy;
static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
- int block, TX_SIZE tx_size, int ctx) {
+ int blk_row, int blk_col, int block,
+ TX_SIZE tx_size, int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi[0]->mbmi);
- av1_token_state_greedy tokens[MAX_TX_SQUARE + 1][2];
uint8_t token_cache[MAX_TX_SQUARE];
const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
@@ -158,23 +139,27 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
const PLANE_TYPE plane_type = pd->plane_type;
const int16_t *const dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int16_t *const scan = scan_order->scan;
const int16_t *const nb = scan_order->neighbors;
int dqv;
const int shift = av1_get_tx_scale(tx_size);
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
- const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
+ // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
+ const qm_val_t *iqmatrix =
+ IS_2D_TRANSFORM(tx_type)
+ ? pd->seg_iqmatrix[seg_id][!ref][tx_size]
+ : cm->giqmatrix[NUM_QM_LEVELS - 1][0][0][tx_size];
#endif
#if CONFIG_NEW_QUANT
int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
#endif // CONFIG_NEW_QUANT
int sz = 0;
- const int64_t rddiv = mb->rddiv;
int64_t rd_cost0, rd_cost1;
int16_t t0, t1;
int i, final_eob;
@@ -193,19 +178,8 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
int64_t rate0, rate1;
for (i = 0; i < eob; i++) {
const int rc = scan[i];
- int x = qcoeff[rc];
- t0 = av1_get_token(x);
-
- tokens[i][0].qc = x;
- tokens[i][0].token = t0;
- tokens[i][0].dqc = dqcoeff[rc];
-
- token_cache[rc] = av1_pt_energy_class[t0];
+ token_cache[rc] = av1_pt_energy_class[av1_get_token(qcoeff[rc])];
}
- tokens[eob][0].token = EOB_TOKEN;
- tokens[eob][0].qc = 0;
- tokens[eob][0].dqc = 0;
- tokens[eob][1] = tokens[eob][0];
unsigned int(*token_costs_ptr)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
token_costs;
@@ -213,20 +187,22 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
final_eob = 0;
int64_t eob_cost0, eob_cost1;
+ tran_low_t before_best_eob_qc = 0;
+ tran_low_t before_best_eob_dqc = 0;
const int ctx0 = ctx;
/* Record the r-d cost */
int64_t accu_rate = 0;
- int64_t accu_error = 0;
+ // Initialized to the worst possible error for the largest transform size.
+ // This ensures that it never goes negative.
+ int64_t accu_error = ((int64_t)1) << 50;
rate0 = get_token_bit_costs(*(token_costs_ptr + band_translate[0]), 0, ctx0,
EOB_TOKEN);
- int64_t best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
+ int64_t best_block_rd_cost = RDCOST(rdmult, rate0, accu_error);
// int64_t best_block_rd_cost_all0 = best_block_rd_cost;
-
int x_prev = 1;
-
for (i = 0; i < eob; i++) {
const int rc = scan[i];
int x = qcoeff[rc];
@@ -238,9 +214,9 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
if (x == 0) {
// no need to search when x == 0
- rate0 =
- get_token_bit_costs(*(token_costs_ptr + band_cur), token_tree_sel_cur,
- ctx_cur, tokens[i][0].token);
+ int token = av1_get_token(x);
+ rate0 = get_token_bit_costs(*(token_costs_ptr + band_cur),
+ token_tree_sel_cur, ctx_cur, token);
accu_rate += rate0;
x_prev = 0;
// accu_error does not change when x==0
@@ -249,7 +225,7 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
*/
// compute the distortion for the first candidate
// and the distortion for quantizing to 0.
- int dx0 = (-coeff[rc]) * (1 << shift);
+ int dx0 = abs(coeff[rc]) * (1 << shift);
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dx0 >>= xd->bd - 8;
@@ -273,7 +249,9 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- dx >>= xd->bd - 8;
+ int dx_sign = dx < 0 ? 1 : 0;
+ dx = abs(dx) >> (xd->bd - 8);
+ if (dx_sign) dx = -dx;
}
#endif // CONFIG_HIGHBITDEPTH
d2 = (int64_t)dx * dx;
@@ -329,14 +307,16 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
if (i < default_eob - 1) {
int ctx_next, token_tree_sel_next;
int band_next = band_translate[i + 1];
+ int token_next =
+ i + 1 != eob ? av1_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
token_cache[rc] = av1_pt_energy_class[t0];
ctx_next = get_coef_context(nb, token_cache, i + 1);
token_tree_sel_next = (x == 0);
- next_bits0 = get_token_bit_costs(*(token_costs_ptr + band_next),
- token_tree_sel_next, ctx_next,
- tokens[i + 1][0].token);
+ next_bits0 =
+ get_token_bit_costs(*(token_costs_ptr + band_next),
+ token_tree_sel_next, ctx_next, token_next);
next_eob_bits0 =
get_token_bit_costs(*(token_costs_ptr + band_next),
token_tree_sel_next, ctx_next, EOB_TOKEN);
@@ -345,9 +325,9 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
ctx_next = get_coef_context(nb, token_cache, i + 1);
token_tree_sel_next = (x_a == 0);
- next_bits1 = get_token_bit_costs(*(token_costs_ptr + band_next),
- token_tree_sel_next, ctx_next,
- tokens[i + 1][0].token);
+ next_bits1 =
+ get_token_bit_costs(*(token_costs_ptr + band_next),
+ token_tree_sel_next, ctx_next, token_next);
if (x_a != 0) {
next_eob_bits1 =
@@ -356,16 +336,16 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
}
}
- rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), d2);
- rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), d2_a);
+ rd_cost0 = RDCOST(rdmult, (rate0 + next_bits0), d2);
+ rd_cost1 = RDCOST(rdmult, (rate1 + next_bits1), d2_a);
best_x = (rd_cost1 < rd_cost0);
- eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
+ eob_cost0 = RDCOST(rdmult, (accu_rate + rate0 + next_eob_bits0),
(accu_error + d2 - d0));
eob_cost1 = eob_cost0;
if (x_a != 0) {
- eob_cost1 = RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
+ eob_cost1 = RDCOST(rdmult, (accu_rate + rate1 + next_eob_bits1),
(accu_error + d2_a - d0));
best_eob_x = (eob_cost1 < eob_cost0);
} else {
@@ -410,38 +390,35 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
token_cache[rc] = av1_pt_energy_class[t0];
}
+ assert(accu_error >= 0);
x_prev = qcoeff[rc];
// determine whether to move the eob position to i+1
- int64_t best_eob_cost_i = eob_cost0;
-
- tokens[i][1].token = t0;
- tokens[i][1].qc = x;
- tokens[i][1].dqc = dqc;
-
- if ((x_a != 0) && (best_eob_x)) {
- best_eob_cost_i = eob_cost1;
-
- tokens[i][1].token = t1;
- tokens[i][1].qc = x_a;
- tokens[i][1].dqc = dqc_a;
- }
+ int use_a = (x_a != 0) && (best_eob_x);
+ int64_t best_eob_cost_i = use_a ? eob_cost1 : eob_cost0;
if (best_eob_cost_i < best_block_rd_cost) {
best_block_rd_cost = best_eob_cost_i;
final_eob = i + 1;
+ if (use_a) {
+ before_best_eob_qc = x_a;
+ before_best_eob_dqc = dqc_a;
+ } else {
+ before_best_eob_qc = x;
+ before_best_eob_dqc = dqc;
+ }
}
} // if (x==0)
} // for (i)
assert(final_eob <= eob);
if (final_eob > 0) {
- assert(tokens[final_eob - 1][1].qc != 0);
+ assert(before_best_eob_qc != 0);
i = final_eob - 1;
int rc = scan[i];
- qcoeff[rc] = tokens[i][1].qc;
- dqcoeff[rc] = tokens[i][1].dqc;
+ qcoeff[rc] = before_best_eob_qc;
+ dqcoeff[rc] = before_best_eob_dqc;
}
for (i = final_eob; i < eob; i++) {
@@ -453,366 +430,19 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
mb->plane[plane].eobs[block] = final_eob;
return final_eob;
}
-
-#else // USE_GREEDY_OPTIMIZE_B
-
-typedef struct av1_token_state_org {
- int64_t error;
- int rate;
- int16_t next;
- int16_t token;
- tran_low_t qc;
- tran_low_t dqc;
- uint8_t best_index;
-} av1_token_state_org;
-
-static int optimize_b_org(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
- int block, TX_SIZE tx_size, int ctx) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- struct macroblock_plane *const p = &mb->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const int ref = is_inter_block(&xd->mi[0]->mbmi);
- av1_token_state_org tokens[MAX_TX_SQUARE + 1][2];
- uint8_t token_cache[MAX_TX_SQUARE];
- const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
- tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- const int eob = p->eobs[block];
- const PLANE_TYPE plane_type = pd->plane_type;
- const int default_eob = tx_size_2d[tx_size];
- const int16_t *const dequant_ptr = pd->dequant;
- const uint8_t *const band_translate = get_band_translate(tx_size);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
- const int16_t *const scan = scan_order->scan;
- const int16_t *const nb = scan_order->neighbors;
- int dqv;
- const int shift = av1_get_tx_scale(tx_size);
-#if CONFIG_AOM_QM
- int seg_id = xd->mi[0]->mbmi.segment_id;
- const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
-#endif
-#if CONFIG_NEW_QUANT
- int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
- const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
-#endif // CONFIG_NEW_QUANT
- int next = eob, sz = 0;
- const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
- const int64_t rddiv = mb->rddiv;
- int64_t rd_cost0, rd_cost1;
- int rate0, rate1;
- int64_t error0, error1;
- int16_t t0, t1;
- int best, band = (eob < default_eob) ? band_translate[eob]
- : band_translate[eob - 1];
- int pt, i, final_eob;
- const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
- unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
- const uint16_t *band_counts = &band_count_table[tx_size][band];
- uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
- int shortcut = 0;
- int next_shortcut = 0;
-
-#if CONFIG_EXT_DELTA_Q
- const int qindex = cm->seg.enabled
- ? av1_get_qindex(&cm->seg, xd->mi[0]->mbmi.segment_id,
- cm->base_qindex)
- : cm->base_qindex;
- assert(qindex > 0);
- (void)qindex;
-#else
- assert(mb->qindex > 0);
-#endif
-
- token_costs += band;
-
- assert((!plane_type && !plane) || (plane_type && plane));
- assert(eob <= default_eob);
-
- /* Now set up a Viterbi trellis to evaluate alternative roundings. */
- /* Initialize the sentinel node of the trellis. */
- tokens[eob][0].rate = 0;
- tokens[eob][0].error = 0;
- tokens[eob][0].next = default_eob;
- tokens[eob][0].token = EOB_TOKEN;
- tokens[eob][0].qc = 0;
- tokens[eob][1] = tokens[eob][0];
-
- for (i = 0; i < eob; i++) {
- const int rc = scan[i];
- tokens[i][0].rate = av1_get_token_cost(qcoeff[rc], &t0, cat6_bits);
- tokens[i][0].token = t0;
- token_cache[rc] = av1_pt_energy_class[t0];
- }
-
- for (i = eob; i-- > 0;) {
- int base_bits, dx;
- int64_t d2;
- const int rc = scan[i];
- int x = qcoeff[rc];
-#if CONFIG_AOM_QM
- int iwt = iqmatrix[rc];
- dqv = dequant_ptr[rc != 0];
- dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-#else
- dqv = dequant_ptr[rc != 0];
-#endif
- next_shortcut = shortcut;
-
- /* Only add a trellis state for non-zero coefficients. */
- if (UNLIKELY(x)) {
- error0 = tokens[next][0].error;
- error1 = tokens[next][1].error;
- /* Evaluate the first possibility for this state. */
- rate0 = tokens[next][0].rate;
- rate1 = tokens[next][1].rate;
-
- if (next_shortcut) {
- /* Consider both possible successor states. */
- if (next < default_eob) {
- pt = get_coef_context(nb, token_cache, i + 1);
- rate0 +=
- get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
- rate1 +=
- get_token_bit_costs(*token_costs, 0, pt, tokens[next][1].token);
- }
- UPDATE_RD_COST();
- /* And pick the best. */
- best = rd_cost1 < rd_cost0;
- } else {
- if (next < default_eob) {
- pt = get_coef_context(nb, token_cache, i + 1);
- rate0 +=
- get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
- }
- best = 0;
- }
-
- dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- dx >>= xd->bd - 8;
- }
-#endif // CONFIG_HIGHBITDEPTH
- d2 = (int64_t)dx * dx;
- tokens[i][0].rate += (best ? rate1 : rate0);
- tokens[i][0].error = d2 + (best ? error1 : error0);
- tokens[i][0].next = next;
- tokens[i][0].qc = x;
- tokens[i][0].dqc = dqcoeff[rc];
- tokens[i][0].best_index = best;
-
- /* Evaluate the second possibility for this state. */
- rate0 = tokens[next][0].rate;
- rate1 = tokens[next][1].rate;
-
- // The threshold of 3 is empirically obtained.
- if (UNLIKELY(abs(x) > 3)) {
- shortcut = 0;
- } else {
-#if CONFIG_NEW_QUANT
- shortcut = ((av1_dequant_abscoeff_nuq(abs(x), dqv,
- dequant_val[band_translate[i]]) >
- (abs(coeff[rc]) << shift)) &&
- (av1_dequant_abscoeff_nuq(abs(x) - 1, dqv,
- dequant_val[band_translate[i]]) <
- (abs(coeff[rc]) << shift)));
-#else // CONFIG_NEW_QUANT
-#if CONFIG_AOM_QM
- if ((abs(x) * dequant_ptr[rc != 0] * iwt >
- ((abs(coeff[rc]) << shift) << AOM_QM_BITS)) &&
- (abs(x) * dequant_ptr[rc != 0] * iwt <
- (((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])
- << AOM_QM_BITS)))
-#else
- if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
- (abs(x) * dequant_ptr[rc != 0] <
- (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
-#endif // CONFIG_AOM_QM
- shortcut = 1;
- else
- shortcut = 0;
-#endif // CONFIG_NEW_QUANT
- }
-
- if (shortcut) {
- sz = -(x < 0);
- x -= 2 * sz + 1;
- } else {
- tokens[i][1] = tokens[i][0];
- next = i;
-
- if (UNLIKELY(!(--band_left))) {
- --band_counts;
- band_left = *band_counts;
- --token_costs;
- }
- continue;
- }
-
- /* Consider both possible successor states. */
- if (!x) {
- /* If we reduced this coefficient to zero, check to see if
- * we need to move the EOB back here.
- */
- t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
- t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
- base_bits = 0;
- } else {
- base_bits = av1_get_token_cost(x, &t0, cat6_bits);
- t1 = t0;
- }
-
- if (next_shortcut) {
- if (LIKELY(next < default_eob)) {
- if (t0 != EOB_TOKEN) {
- token_cache[rc] = av1_pt_energy_class[t0];
- pt = get_coef_context(nb, token_cache, i + 1);
- rate0 += get_token_bit_costs(*token_costs, !x, pt,
- tokens[next][0].token);
- }
- if (t1 != EOB_TOKEN) {
- token_cache[rc] = av1_pt_energy_class[t1];
- pt = get_coef_context(nb, token_cache, i + 1);
- rate1 += get_token_bit_costs(*token_costs, !x, pt,
- tokens[next][1].token);
- }
- }
-
- UPDATE_RD_COST();
- /* And pick the best. */
- best = rd_cost1 < rd_cost0;
- } else {
- // The two states in next stage are identical.
- if (next < default_eob && t0 != EOB_TOKEN) {
- token_cache[rc] = av1_pt_energy_class[t0];
- pt = get_coef_context(nb, token_cache, i + 1);
- rate0 +=
- get_token_bit_costs(*token_costs, !x, pt, tokens[next][0].token);
- }
- best = 0;
- }
-
-#if CONFIG_NEW_QUANT
- dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
- (coeff[rc] << shift);
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- dx >>= xd->bd - 8;
- }
-#endif // CONFIG_HIGHBITDEPTH
-#else // CONFIG_NEW_QUANT
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
- } else {
- dx -= (dqv + sz) ^ sz;
- }
-#else
- dx -= (dqv + sz) ^ sz;
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_NEW_QUANT
- d2 = (int64_t)dx * dx;
-
- tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
- tokens[i][1].error = d2 + (best ? error1 : error0);
- tokens[i][1].next = next;
- tokens[i][1].token = best ? t1 : t0;
- tokens[i][1].qc = x;
-
- if (x) {
-#if CONFIG_NEW_QUANT
- tokens[i][1].dqc = av1_dequant_abscoeff_nuq(
- abs(x), dqv, dequant_val[band_translate[i]]);
- tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift)
- : tokens[i][1].dqc;
- if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
-#else
- if (x < 0)
- tokens[i][1].dqc = -((-x * dqv) >> shift);
- else
- tokens[i][1].dqc = (x * dqv) >> shift;
-#endif // CONFIG_NEW_QUANT
- } else {
- tokens[i][1].dqc = 0;
- }
-
- tokens[i][1].best_index = best;
- /* Finally, make this the new head of the trellis. */
- next = i;
- } else {
- /* There's no choice to make for a zero coefficient, so we don't
- * add a new trellis node, but we do need to update the costs.
- */
- t0 = tokens[next][0].token;
- t1 = tokens[next][1].token;
- pt = get_coef_context(nb, token_cache, i + 1);
- /* Update the cost of each path if we're past the EOB token. */
- if (t0 != EOB_TOKEN) {
- tokens[next][0].rate += get_token_bit_costs(*token_costs, 1, pt, t0);
- tokens[next][0].token = ZERO_TOKEN;
- }
- if (t1 != EOB_TOKEN) {
- tokens[next][1].rate += get_token_bit_costs(*token_costs, 1, pt, t1);
- tokens[next][1].token = ZERO_TOKEN;
- }
- tokens[i][0].best_index = tokens[i][1].best_index = 0;
- shortcut = (tokens[next][0].rate != tokens[next][1].rate);
- /* Don't update next, because we didn't add a new node. */
- }
-
- if (UNLIKELY(!(--band_left))) {
- --band_counts;
- band_left = *band_counts;
- --token_costs;
- }
- }
-
- /* Now pick the best path through the whole trellis. */
- rate0 = tokens[next][0].rate;
- rate1 = tokens[next][1].rate;
- error0 = tokens[next][0].error;
- error1 = tokens[next][1].error;
- t0 = tokens[next][0].token;
- t1 = tokens[next][1].token;
- rate0 += get_token_bit_costs(*token_costs, 0, ctx, t0);
- rate1 += get_token_bit_costs(*token_costs, 0, ctx, t1);
- UPDATE_RD_COST();
- best = rd_cost1 < rd_cost0;
-
- final_eob = -1;
-
- for (i = next; i < eob; i = next) {
- const int x = tokens[i][best].qc;
- const int rc = scan[i];
- if (x) final_eob = i;
- qcoeff[rc] = x;
- dqcoeff[rc] = tokens[i][best].dqc;
-
- next = tokens[i][best].next;
- best = tokens[i][best].best_index;
- }
- final_eob++;
-
- mb->plane[plane].eobs[block] = final_eob;
- assert(final_eob <= default_eob);
- return final_eob;
-}
-
-#endif // USE_GREEDY_OPTIMIZE_B
#endif // !CONFIG_LV_MAP
-int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l) {
+int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
+ int blk_col, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+ const ENTROPY_CONTEXT *l) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
const int eob = p->eobs[block];
assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
if (eob == 0) return eob;
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return eob;
+
#if CONFIG_PVQ
(void)cm;
(void)tx_size;
@@ -823,26 +453,24 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
#if !CONFIG_LV_MAP
(void)plane_bsize;
+ (void)blk_row;
+ (void)blk_col;
#if CONFIG_VAR_TX
int ctx = get_entropy_context(tx_size, a, l);
#else
int ctx = combine_entropy_contexts(*a, *l);
-#endif
-
-#if USE_GREEDY_OPTIMIZE_B
- return optimize_b_greedy(cm, mb, plane, block, tx_size, ctx);
-#else // USE_GREEDY_OPTIMIZE_B
- return optimize_b_org(cm, mb, plane, block, tx_size, ctx);
-#endif // USE_GREEDY_OPTIMIZE_B
+#endif // CONFIG_VAR_TX
+ return optimize_b_greedy(cm, mb, plane, blk_row, blk_col, block, tx_size,
+ ctx);
#else // !CONFIG_LV_MAP
TXB_CTX txb_ctx;
get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
- return av1_optimize_txb(cm, mb, plane, block, tx_size, &txb_ctx);
+ return av1_optimize_txb(cm, mb, plane, blk_row, blk_col, block, tx_size,
+ &txb_ctx);
#endif // !CONFIG_LV_MAP
}
#if !CONFIG_PVQ
-#if CONFIG_HIGHBITDEPTH
typedef enum QUANT_FUNC {
QUANT_FUNC_LOWBD = 0,
QUANT_FUNC_HIGHBD = 1,
@@ -862,29 +490,12 @@ static AV1_QUANT_FACADE
#endif // !CONFIG_NEW_QUANT
{ NULL, NULL }
};
+#endif // !CONFIG_PVQ
-#else
-
-typedef enum QUANT_FUNC {
- QUANT_FUNC_LOWBD = 0,
- QUANT_FUNC_TYPES = 1
-} QUANT_FUNC;
-
-static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES]
- [QUANT_FUNC_TYPES] = {
-#if !CONFIG_NEW_QUANT
- { av1_quantize_fp_facade },
- { av1_quantize_b_facade },
- { av1_quantize_dc_facade },
-#else // !CONFIG_NEW_QUANT
- { av1_quantize_fp_nuq_facade },
- { av1_quantize_b_nuq_facade },
- { av1_quantize_dc_nuq_facade },
-#endif // !CONFIG_NEW_QUANT
- { NULL }
- };
-#endif // CONFIG_HIGHBITDEPTH
-#endif // CONFIG_PVQ
+typedef void (*fwdTxfmFunc)(const int16_t *diff, tran_low_t *coeff, int stride,
+ TxfmParam *txfm_param);
+static const fwdTxfmFunc fwd_txfm_func[2] = { av1_fwd_txfm,
+ av1_highbd_fwd_txfm };
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
@@ -892,7 +503,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
AV1_XFORM_QUANT xform_quant_idx) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-#if !(CONFIG_PVQ || CONFIG_DAALA_DIST)
+#if !(CONFIG_PVQ || CONFIG_DIST_8X8)
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
#else
@@ -900,9 +511,14 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
struct macroblockd_plane *const pd = &xd->plane[plane];
#endif
PLANE_TYPE plane_type = get_plane_type(plane);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
+
+#if CONFIG_AOM_QM || CONFIG_NEW_QUANT
const int is_inter = is_inter_block(mbmi);
- const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter);
+#endif
+
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -910,19 +526,28 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
const int diff_stride = block_size_wide[plane_bsize];
#if CONFIG_AOM_QM
int seg_id = mbmi->segment_id;
- const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size];
- const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size];
+ // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
+ const qm_val_t *qmatrix =
+ IS_2D_TRANSFORM(tx_type) ? pd->seg_qmatrix[seg_id][!is_inter][tx_size]
+ : cm->gqmatrix[NUM_QM_LEVELS - 1][0][0][tx_size];
+ const qm_val_t *iqmatrix =
+ IS_2D_TRANSFORM(tx_type)
+ ? pd->seg_iqmatrix[seg_id][!is_inter][tx_size]
+ : cm->giqmatrix[NUM_QM_LEVELS - 1][0][0][tx_size];
#endif
- FWD_TXFM_PARAM fwd_txfm_param;
+ TxfmParam txfm_param;
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
+#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
uint8_t *dst;
- int16_t *pred;
const int dst_stride = pd->dst.stride;
- int tx_blk_size;
+#if CONFIG_PVQ || CONFIG_DIST_8X8
+ int16_t *pred;
+ const int txw = tx_size_wide[tx_size];
+ const int txh = tx_size_high[tx_size];
int i, j;
#endif
+#endif
#if !CONFIG_PVQ
const int tx2d_size = tx_size_2d[tx_size];
@@ -960,79 +585,68 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
src_int16 =
&p->src_int16[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
- // transform block size in pixels
- tx_blk_size = tx_size_wide[tx_size];
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++)
+ for (j = 0; j < txh; j++)
+ for (i = 0; i < txw; i++)
src_int16[diff_stride * j + i] =
CONVERT_TO_SHORTPTR(src)[src_stride * j + i];
} else {
#endif // CONFIG_HIGHBITDEPTH
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++)
+ for (j = 0; j < txh; j++)
+ for (i = 0; i < txw; i++)
src_int16[diff_stride * j + i] = src[src_stride * j + i];
#if CONFIG_HIGHBITDEPTH
}
#endif // CONFIG_HIGHBITDEPTH
#endif
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
+#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+#if CONFIG_PVQ || CONFIG_DIST_8X8
pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
- // transform block size in pixels
- tx_blk_size = tx_size_wide[tx_size];
-
// copy uint8 orig and predicted block to int16 buffer
// in order to use existing VP10 transform functions
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++)
+ for (j = 0; j < txh; j++)
+ for (i = 0; i < txw; i++)
pred[diff_stride * j + i] =
CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
} else {
#endif // CONFIG_HIGHBITDEPTH
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++)
+ for (j = 0; j < txh; j++)
+ for (i = 0; i < txw; i++)
pred[diff_stride * j + i] = dst[dst_stride * j + i];
#if CONFIG_HIGHBITDEPTH
}
#endif // CONFIG_HIGHBITDEPTH
-#endif
+#endif // CONFIG_PVQ || CONFIG_DIST_8X8
+#endif // CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
(void)ctx;
- fwd_txfm_param.tx_type = tx_type;
- fwd_txfm_param.tx_size = tx_size;
- fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
+ txfm_param.tx_type = tx_type;
+ txfm_param.tx_size = tx_size;
+ txfm_param.lossless = xd->lossless[mbmi->segment_id];
+#if CONFIG_MRC_TX || CONFIG_LGT
+ txfm_param.dst = dst;
+ txfm_param.stride = dst_stride;
+#endif // CONFIG_MRC_TX || CONFIG_LGT
+#if CONFIG_LGT
+ txfm_param.is_inter = is_inter_block(mbmi);
+ txfm_param.mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
+#endif
#if !CONFIG_PVQ
-#if CONFIG_HIGHBITDEPTH
- fwd_txfm_param.bd = xd->bd;
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
- if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
- if (LIKELY(!x->skip_block)) {
- quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
- coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
- } else {
- av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
- }
- }
-#if CONFIG_LV_MAP
- p->txb_entropy_ctx[block] =
- (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
-#endif // CONFIG_LV_MAP
- return;
- }
-#endif // CONFIG_HIGHBITDEPTH
- av1_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ txfm_param.bd = xd->bd;
+ const int is_hbd = get_bitdepth_data_path_index(xd);
+ fwd_txfm_func[is_hbd](src_diff, coeff, diff_stride, &txfm_param);
+
if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
- quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
+ quant_func_list[xform_quant_idx][is_hbd](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
} else {
av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
@@ -1042,17 +656,18 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
p->txb_entropy_ctx[block] =
(uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
#endif // CONFIG_LV_MAP
-#else // #if !CONFIG_PVQ
+ return;
+#else // CONFIG_PVQ
(void)xform_quant_idx;
#if CONFIG_HIGHBITDEPTH
- fwd_txfm_param.bd = xd->bd;
+ txfm_param.bd = xd->bd;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- av1_highbd_fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
- av1_highbd_fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+ av1_highbd_fwd_txfm(src_int16, coeff, diff_stride, &txfm_param);
+ av1_highbd_fwd_txfm(pred, ref_coeff, diff_stride, &txfm_param);
} else {
#endif
- av1_fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
- av1_fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
+ av1_fwd_txfm(src_int16, coeff, diff_stride, &txfm_param);
+ av1_fwd_txfm(pred, ref_coeff, diff_stride, &txfm_param);
#if CONFIG_HIGHBITDEPTH
}
#endif
@@ -1130,7 +745,8 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
#endif
#if !CONFIG_PVQ
- av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
+ av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, a,
+ l);
av1_set_txb_context(x, plane, block, tx_size, a, l);
@@ -1143,9 +759,16 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
if (x->pvq_skip[plane]) return;
#endif
- TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+ TX_TYPE tx_type =
+ av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, block, tx_size);
+#if CONFIG_LGT
+ PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
+ av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, dst,
+ pd->dst.stride, p->eobs[block]);
+#else
av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst,
pd->dst.stride, p->eobs[block]);
+#endif
}
#if CONFIG_VAR_TX
@@ -1174,16 +797,32 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
} else {
assert(tx_size < TX_SIZES_ALL);
+#if CONFIG_RECT_TX_EXT
+ int is_qttx = plane_tx_size == quarter_txsize_lookup[plane_bsize];
+ const TX_SIZE sub_txs = is_qttx ? plane_tx_size : sub_tx_size_map[tx_size];
+ if (is_qttx) assert(blk_row == 0 && blk_col == 0 && block == 0);
+#else
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
assert(sub_txs < tx_size);
+#endif
// This is the square transform block partition entry point.
int bsl = tx_size_wide_unit[sub_txs];
int i;
assert(bsl > 0);
for (i = 0; i < 4; ++i) {
+#if CONFIG_RECT_TX_EXT
+ int is_wide_tx = tx_size_wide_unit[sub_txs] > tx_size_high_unit[sub_txs];
+ const int offsetr =
+ is_qttx ? (is_wide_tx ? i * tx_size_high_unit[sub_txs] : 0)
+ : blk_row + ((i >> 1) * bsl);
+ const int offsetc =
+ is_qttx ? (is_wide_tx ? 0 : i * tx_size_wide_unit[sub_txs])
+ : blk_col + ((i & 0x01) * bsl);
+#else
const int offsetr = blk_row + ((i >> 1) * bsl);
const int offsetc = blk_col + ((i & 0x01) * bsl);
+#endif
int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
@@ -1211,6 +850,7 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ TxfmParam txfm_param;
uint8_t *dst;
int ctx = 0;
dst = &pd->dst
@@ -1246,22 +886,20 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
#endif // CONFIG_HIGHBITDEPTH
}
#endif // !CONFIG_PVQ
+ txfm_param.bd = xd->bd;
+ txfm_param.tx_type = DCT_DCT;
+ txfm_param.eob = p->eobs[block];
+ txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- av1_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
- xd->bd);
- } else {
- av1_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
- xd->bd);
- }
+ av1_highbd_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, &txfm_param);
return;
}
#endif // CONFIG_HIGHBITDEPTH
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
- av1_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ av1_iwht4x4_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
} else {
- av1_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ av1_idct4x4_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
}
}
}
@@ -1316,7 +954,7 @@ void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
av1_get_entropy_contexts(bsize, 0, pd, ctx.ta[plane], ctx.tl[plane]);
#else
const struct macroblockd_plane *const pd = &xd->plane[plane];
- const TX_SIZE tx_size = get_tx_size(plane, xd);
+ const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
av1_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
#endif
@@ -1327,11 +965,27 @@ void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
arg.tl = ctx.tl[plane];
#if CONFIG_VAR_TX
- for (idy = 0; idy < mi_height; idy += bh) {
- for (idx = 0; idx < mi_width; idx += bw) {
- encode_block_inter(plane, block, idy, idx, plane_bsize, max_tx_size,
- &arg);
- block += step;
+ const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd);
+ int mu_blocks_wide =
+ block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
+ int mu_blocks_high =
+ block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
+
+ mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide);
+ mu_blocks_high = AOMMIN(mi_height, mu_blocks_high);
+
+ for (idy = 0; idy < mi_height; idy += mu_blocks_high) {
+ for (idx = 0; idx < mi_width; idx += mu_blocks_wide) {
+ int blk_row, blk_col;
+ const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height);
+ const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width);
+ for (blk_row = idy; blk_row < unit_height; blk_row += bh) {
+ for (blk_col = idx; blk_col < unit_width; blk_col += bw) {
+ encode_block_inter(plane, block, blk_row, blk_col, plane_bsize,
+ max_tx_size, &arg);
+ block += step;
+ }
+ }
}
}
#else
@@ -1357,7 +1011,7 @@ void av1_encode_sb_supertx(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
#if CONFIG_VAR_TX
const TX_SIZE tx_size = TX_4X4;
#else
- const TX_SIZE tx_size = get_tx_size(plane, xd);
+ const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
#endif
av1_subtract_plane(x, bsize, plane);
av1_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
@@ -1435,6 +1089,24 @@ static void quantize_scaler(int coeff, int16_t zbin, int16_t round_value,
}
}
+#if CONFIG_HIGHBITDEPTH
+typedef void (*hbd_dpcm_fwd_tx_func)(const int16_t *input, int stride,
+ TX_TYPE_1D tx_type, tran_low_t *output,
+ int dir);
+
+static hbd_dpcm_fwd_tx_func get_hbd_dpcm_fwd_tx_func(int tx_length) {
+ switch (tx_length) {
+ case 4: return av1_hbd_dpcm_ft4_c;
+ case 8: return av1_hbd_dpcm_ft8_c;
+ case 16: return av1_hbd_dpcm_ft16_c;
+ case 32:
+ return av1_hbd_dpcm_ft32_c;
+ // TODO(huisu): add support for TX_64X64.
+ default: assert(0); return NULL;
+ }
+}
+#endif // CONFIG_HIGHBITDEPTH
+
typedef void (*dpcm_fwd_tx_func)(const int16_t *input, int stride,
TX_TYPE_1D tx_type, tran_low_t *output);
@@ -1539,7 +1211,7 @@ static void hbd_process_block_dpcm_vert(
int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff,
tran_low_t *dqcoeff) {
const int tx1d_width = tx_size_wide[tx_size];
- dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width);
+ hbd_dpcm_fwd_tx_func forward_tx = get_hbd_dpcm_fwd_tx_func(tx1d_width);
hbd_dpcm_inv_txfm_add_func inverse_tx =
av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_width);
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
@@ -1553,7 +1225,7 @@ static void hbd_process_block_dpcm_vert(
// Subtraction.
for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c];
// Forward transform.
- forward_tx(src_diff, 1, tx_type_1d, coeff);
+ forward_tx(src_diff, 1, tx_type_1d, coeff, 1);
// Quantization.
for (int c = 0; c < tx1d_width; ++c) {
quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx],
@@ -1562,7 +1234,7 @@ static void hbd_process_block_dpcm_vert(
q_idx = 1;
}
// Inverse transform.
- inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst);
+ inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst, 1);
// Move to the next row.
coeff += tx1d_width;
qcoeff += tx1d_width;
@@ -1580,7 +1252,7 @@ static void hbd_process_block_dpcm_horz(
int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff,
tran_low_t *dqcoeff) {
const int tx1d_height = tx_size_high[tx_size];
- dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height);
+ hbd_dpcm_fwd_tx_func forward_tx = get_hbd_dpcm_fwd_tx_func(tx1d_height);
hbd_dpcm_inv_txfm_add_func inverse_tx =
av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_height);
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
@@ -1597,7 +1269,7 @@ static void hbd_process_block_dpcm_horz(
}
// Forward transform.
tran_low_t tx_buff[64];
- forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff);
+ forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff, 0);
for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r];
// Quantization.
for (int r = 0; r < tx1d_height; ++r) {
@@ -1609,7 +1281,7 @@ static void hbd_process_block_dpcm_horz(
}
// Inverse transform.
for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width];
- inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst);
+ inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst, 0);
// Move to the next column.
++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src;
}
@@ -1631,7 +1303,8 @@ void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
const int dst_stride = pd->dst.stride;
const int tx1d_width = tx_size_wide[tx_size];
const int tx1d_height = tx_size_high[tx_size];
- const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, 0);
+ const SCAN_ORDER *const scan_order =
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
uint8_t *dst =
@@ -1711,30 +1384,20 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
struct macroblockd_plane *const pd = &xd->plane[plane];
tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
uint16_t *eob = &p->eobs[block];
const int dst_stride = pd->dst.stride;
uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-#if CONFIG_CFL
-
-#if CONFIG_EC_ADAPT
- FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *const ec_ctx = cm->fc;
-#endif // CONFIG_EC_ADAPT
- av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
- blk_row, tx_size, plane_bsize);
-#else
av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
-#endif
+#if CONFIG_DPCM_INTRA || CONFIG_LGT
+ const PREDICTION_MODE mode =
+ get_prediction_mode(xd->mi[0], plane, tx_size, block);
#if CONFIG_DPCM_INTRA
- const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const PREDICTION_MODE mode =
- (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
plane_bsize, tx_size, tx_type, args->ta,
@@ -1742,6 +1405,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
return;
}
#endif // CONFIG_DPCM_INTRA
+#endif // CONFIG_DPCM_INTRA || CONFIG_LGT
av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
@@ -1751,7 +1415,8 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
if (args->enable_optimize_b) {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
+ av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
+ a, l);
} else {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
ctx, AV1_XFORM_QUANT_B);
@@ -1763,220 +1428,25 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
if (x->pvq_skip[plane]) return;
#endif // CONFIG_PVQ
- av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, dst_stride,
- *eob);
+ av1_inverse_transform_block(xd, dqcoeff,
+#if CONFIG_LGT
+ mode,
+#endif
+ tx_type, tx_size, dst, dst_stride, *eob);
#if !CONFIG_PVQ
if (*eob) *(args->skip) = 0;
#else
// Note : *(args->skip) == mbmi->skip
#endif
#if CONFIG_CFL
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
if (plane == AOM_PLANE_Y && x->cfl_store_y) {
- cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
- }
-
- if (mbmi->uv_mode == DC_PRED) {
- // TODO(ltrudeau) find a cleaner way to detect last transform block
- if (plane == AOM_PLANE_U) {
- xd->cfl->num_tx_blk[CFL_PRED_U] =
- (blk_row == 0 && blk_col == 0) ? 1
- : xd->cfl->num_tx_blk[CFL_PRED_U] + 1;
- }
-
- if (plane == AOM_PLANE_V) {
- xd->cfl->num_tx_blk[CFL_PRED_V] =
- (blk_row == 0 && blk_col == 0) ? 1
- : xd->cfl->num_tx_blk[CFL_PRED_V] + 1;
-
- if (mbmi->skip &&
- xd->cfl->num_tx_blk[CFL_PRED_U] == xd->cfl->num_tx_blk[CFL_PRED_V]) {
- assert(plane_bsize != BLOCK_INVALID);
- const int block_width = block_size_wide[plane_bsize];
- const int block_height = block_size_high[plane_bsize];
-
- // if SKIP is chosen at the block level, and ind != 0, we must change
- // the prediction
- if (mbmi->cfl_alpha_idx != 0) {
- const struct macroblockd_plane *const pd_cb = &xd->plane[AOM_PLANE_U];
- uint8_t *const dst_cb = pd_cb->dst.buf;
- const int dst_stride_cb = pd_cb->dst.stride;
- uint8_t *const dst_cr = pd->dst.buf;
- const int dst_stride_cr = pd->dst.stride;
- for (int j = 0; j < block_height; j++) {
- for (int i = 0; i < block_width; i++) {
- dst_cb[dst_stride_cb * j + i] =
- (uint8_t)(xd->cfl->dc_pred[CFL_PRED_U] + 0.5);
- dst_cr[dst_stride_cr * j + i] =
- (uint8_t)(xd->cfl->dc_pred[CFL_PRED_V] + 0.5);
- }
- }
- mbmi->cfl_alpha_idx = 0;
- mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
- mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
- }
- }
- }
+ // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
+ // intra predicted.
+ cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize);
}
#endif
}
-#if CONFIG_CFL
-static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
- const uint8_t *src, int src_stride, int blk_width,
- int blk_height, double dc_pred, double alpha,
- int *dist_neg_out) {
- const double dc_pred_bias = dc_pred + 0.5;
- int dist = 0;
- int diff;
-
- if (alpha == 0.0) {
- const int dc_pred_i = (int)dc_pred_bias;
- for (int j = 0; j < blk_height; j++) {
- for (int i = 0; i < blk_width; i++) {
- diff = src[i] - dc_pred_i;
- dist += diff * diff;
- }
- src += src_stride;
- }
-
- if (dist_neg_out) *dist_neg_out = dist;
-
- return dist;
- }
-
- int dist_neg = 0;
- for (int j = 0; j < blk_height; j++) {
- for (int i = 0; i < blk_width; i++) {
- const double scaled_luma = alpha * (y_pix[i] - y_avg);
- const int uv = src[i];
- diff = uv - (int)(scaled_luma + dc_pred_bias);
- dist += diff * diff;
- diff = uv + (int)(scaled_luma - dc_pred_bias);
- dist_neg += diff * diff;
- }
- y_pix += y_stride;
- src += src_stride;
- }
-
- if (dist_neg_out) *dist_neg_out = dist_neg;
-
- return dist;
-}
-
-static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl,
- BLOCK_SIZE bsize,
- CFL_SIGN_TYPE signs_out[CFL_SIGNS]) {
- const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
- const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
- const uint8_t *const src_u = p_u->src.buf;
- const uint8_t *const src_v = p_v->src.buf;
- const int src_stride_u = p_u->src.stride;
- const int src_stride_v = p_v->src.stride;
- const int block_width = block_size_wide[bsize];
- const int block_height = block_size_high[bsize];
- const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
- const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
-
- // Temporary pixel buffer used to store the CfL prediction when we compute the
- // alpha index.
- uint8_t tmp_pix[MAX_SB_SQUARE];
- // Load CfL Prediction over the entire block
- const double y_avg =
- cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, block_width, block_height);
-
- int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
- sse[CFL_PRED_U][0] =
- cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u,
- block_width, block_height, dc_pred_u, 0, NULL);
- sse[CFL_PRED_V][0] =
- cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v,
- block_width, block_height, dc_pred_v, 0, NULL);
- for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
- assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
- sse[CFL_PRED_U][m] = cfl_alpha_dist(
- tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, block_width,
- block_height, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
- sse[CFL_PRED_V][m] = cfl_alpha_dist(
- tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, block_width,
- block_height, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
- }
-
- int dist;
- int64_t cost;
- int64_t best_cost;
-
- // Compute least squares parameter of the entire block
- // IMPORTANT: We assume that the first code is 0,0
- int ind = 0;
- signs_out[CFL_PRED_U] = CFL_SIGN_POS;
- signs_out[CFL_PRED_V] = CFL_SIGN_POS;
-
- dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
- dist *= 16;
- best_cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[0], dist);
-
- for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
- const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
- const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
- for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
- for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
- dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
- sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
- dist *= 16;
- cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[c], dist);
- if (cost < best_cost) {
- best_cost = cost;
- ind = c;
- signs_out[CFL_PRED_U] = sign_u;
- signs_out[CFL_PRED_V] = sign_v;
- }
- }
- }
- }
-
- return ind;
-}
-
-static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
- assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
- AOM_ICDF(CDF_PROB_TOP));
- const int prob_den = CDF_PROB_TOP;
-
- int prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[0]);
- cfl->costs[0] = av1_cost_zero(get_prob(prob_num, prob_den));
-
- for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
- int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
- (cfl_alpha_codes[c][CFL_PRED_V] != 0);
- prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) -
- AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]);
- cfl->costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) +
- av1_cost_literal(sign_bit_cost);
- }
-}
-
-void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
- FRAME_CONTEXT *ec_ctx, int plane,
- int block_idx, int blk_col,
- int blk_row, TX_SIZE tx_size,
- BLOCK_SIZE plane_bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
- if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) {
- CFL_CTX *const cfl = xd->cfl;
- cfl_update_costs(cfl, ec_ctx);
- cfl_dc_pred(xd, plane_bsize, tx_size);
- mbmi->cfl_alpha_idx =
- cfl_compute_alpha_ind(x, cfl, plane_bsize, mbmi->cfl_alpha_signs);
- }
- }
- av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row,
- tx_size);
-}
-#endif
-
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
int enable_optimize_b, int mi_row,
@@ -2001,7 +1471,7 @@ void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
if (enable_optimize_b) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
- const TX_SIZE tx_size = get_tx_size(plane, xd);
+ const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
av1_get_entropy_contexts(bsize, tx_size, pd, ta, tl);
}
av1_foreach_transformed_block_in_plane(
diff --git a/third_party/aom/av1/encoder/encodemb.h b/third_party/aom/av1/encoder/encodemb.h
index 35a2c1570..65476bcae 100644
--- a/third_party/aom/av1/encoder/encodemb.h
+++ b/third_party/aom/av1/encoder/encodemb.h
@@ -53,9 +53,10 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, int ctx, AV1_XFORM_QUANT xform_quant_idx);
-int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l);
+int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
+ int blk_col, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+ const ENTROPY_CONTEXT *l);
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
int blk_col, int blk_row, TX_SIZE tx_size);
@@ -86,14 +87,6 @@ void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k,
int *size, int skip_rest, int skip_dir, int bs);
#endif
-#if CONFIG_CFL
-void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
- FRAME_CONTEXT *ec_ctx, int plane,
- int block_idx, int blk_col,
- int blk_row, TX_SIZE tx_size,
- BLOCK_SIZE plane_bsize);
-#endif
-
#if CONFIG_DPCM_INTRA
void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
PREDICTION_MODE mode, int plane, int block,
diff --git a/third_party/aom/av1/encoder/encodemv.c b/third_party/aom/av1/encoder/encodemv.c
index eb0ff88c4..fd61fe6b2 100644
--- a/third_party/aom/av1/encoder/encodemv.c
+++ b/third_party/aom/av1/encoder/encodemv.c
@@ -31,7 +31,7 @@ void av1_entropy_mv_init(void) {
}
static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
- int usehp) {
+ MvSubpelPrecision precision) {
int offset;
const int sign = comp < 0;
const int mag = sign ? -comp : comp;
@@ -42,34 +42,53 @@ static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
assert(comp != 0);
- // Sign
+// Sign
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_bit(w, sign);
+#else
aom_write(w, sign, mvcomp->sign);
+#endif
// Class
aom_write_symbol(w, mv_class, mvcomp->class_cdf, MV_CLASSES);
// Integer bits
if (mv_class == MV_CLASS_0) {
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE);
+#else
aom_write(w, d, mvcomp->class0[0]);
+#endif
} else {
int i;
const int n = mv_class + CLASS0_BITS - 1; // number of bits
for (i = 0; i < n; ++i) aom_write(w, (d >> i) & 1, mvcomp->bits[i]);
}
- // Fractional bits
- aom_write_symbol(
- w, fr, mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
- MV_FP_SIZE);
+// Fractional bits
+#if CONFIG_INTRABC
+ if (precision > MV_SUBPEL_NONE)
+#endif // CONFIG_INTRABC
+ {
+ aom_write_symbol(w, fr, mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d]
+ : mvcomp->fp_cdf,
+ MV_FP_SIZE);
+ }
// High precision bit
- if (usehp)
+ if (precision > MV_SUBPEL_LOW_PRECISION)
+#if CONFIG_NEW_MULTISYMBOL
+ aom_write_symbol(
+ w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
+ 2);
+#else
aom_write(w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp);
+#endif
}
static void build_nmv_component_cost_table(int *mvcost,
const nmv_component *const mvcomp,
- int usehp) {
+ MvSubpelPrecision precision) {
int i, v;
int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
int bits_cost[MV_OFFSET_BITS][2];
@@ -89,7 +108,7 @@ static void build_nmv_component_cost_table(int *mvcost,
av1_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], av1_mv_fp_tree);
av1_cost_tokens(fp_cost, mvcomp->fp, av1_mv_fp_tree);
- if (usehp) {
+ if (precision > MV_SUBPEL_LOW_PRECISION) {
class0_hp_cost[0] = av1_cost_zero(mvcomp->class0_hp);
class0_hp_cost[1] = av1_cost_one(mvcomp->class0_hp);
hp_cost[0] = av1_cost_zero(mvcomp->hp);
@@ -110,16 +129,21 @@ static void build_nmv_component_cost_table(int *mvcost,
const int b = c + CLASS0_BITS - 1; /* number of bits */
for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
}
- if (c == MV_CLASS_0) {
- cost += class0_fp_cost[d][f];
- } else {
- cost += fp_cost[f];
- }
- if (usehp) {
+#if CONFIG_INTRABC
+ if (precision > MV_SUBPEL_NONE)
+#endif // CONFIG_INTRABC
+ {
if (c == MV_CLASS_0) {
- cost += class0_hp_cost[e];
+ cost += class0_fp_cost[d][f];
} else {
- cost += hp_cost[e];
+ cost += fp_cost[f];
+ }
+ if (precision > MV_SUBPEL_LOW_PRECISION) {
+ if (c == MV_CLASS_0) {
+ cost += class0_hp_cost[e];
+ } else {
+ cost += hp_cost[e];
+ }
}
}
mvcost[v] = cost + sign_cost[0];
@@ -127,36 +151,16 @@ static void build_nmv_component_cost_table(int *mvcost,
}
}
+#if !CONFIG_NEW_MULTISYMBOL
static void update_mv(aom_writer *w, const unsigned int ct[2], aom_prob *cur_p,
aom_prob upd_p) {
(void)upd_p;
-#if CONFIG_TILE_GROUPS
// Just use the default maximum number of tile groups to avoid passing in the
// actual
// number
av1_cond_prob_diff_update(w, cur_p, ct, DEFAULT_MAX_NUM_TG);
-#else
- av1_cond_prob_diff_update(w, cur_p, ct, 1);
-#endif
}
-#if !CONFIG_EC_ADAPT
-static void write_mv_update(const aom_tree_index *tree,
- aom_prob probs[/*n - 1*/],
- const unsigned int counts[/*n - 1*/], int n,
- aom_writer *w) {
- int i;
- unsigned int branch_ct[32][2];
-
- // Assuming max number of probabilities <= 32
- assert(n <= 32);
-
- av1_tree_probs_from_distribution(tree, branch_ct, counts);
- for (i = 0; i < n - 1; ++i)
- update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB);
-}
-#endif
-
void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w,
nmv_context_counts *const nmv_counts) {
int i;
@@ -164,34 +168,6 @@ void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w,
for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx];
nmv_context_counts *const counts = &nmv_counts[nmv_ctx];
-#if !CONFIG_EC_ADAPT
- write_mv_update(av1_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS,
- w);
-
- for (i = 0; i < 2; ++i) {
- int j;
- nmv_component *comp = &mvc->comps[i];
- nmv_component_counts *comp_counts = &counts->comps[i];
-
- update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
- write_mv_update(av1_mv_class_tree, comp->classes, comp_counts->classes,
- MV_CLASSES, w);
- write_mv_update(av1_mv_class0_tree, comp->class0, comp_counts->class0,
- CLASS0_SIZE, w);
- for (j = 0; j < MV_OFFSET_BITS; ++j)
- update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB);
- }
-
- for (i = 0; i < 2; ++i) {
- int j;
- for (j = 0; j < CLASS0_SIZE; ++j)
- write_mv_update(av1_mv_fp_tree, mvc->comps[i].class0_fp[j],
- counts->comps[i].class0_fp[j], MV_FP_SIZE, w);
-
- write_mv_update(av1_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp,
- MV_FP_SIZE, w);
- }
-#endif
if (usehp) {
for (i = 0; i < 2; ++i) {
@@ -202,6 +178,7 @@ void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w,
}
}
}
+#endif
void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
nmv_context *mvctx, int usehp) {
@@ -230,18 +207,19 @@ void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
aom_write_symbol(w, j, mvctx->joint_cdf, MV_JOINTS);
if (mv_joint_vertical(j))
- encode_mv_component(w, diff.row, &mvctx->comps[0], 0);
+ encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE);
if (mv_joint_horizontal(j))
- encode_mv_component(w, diff.col, &mvctx->comps[1], 0);
+ encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE);
}
#endif // CONFIG_INTRABC
void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
- const nmv_context *ctx, int usehp) {
+ const nmv_context *ctx,
+ MvSubpelPrecision precision) {
av1_cost_tokens(mvjoint, ctx->joints, av1_mv_joint_tree);
- build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp);
- build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
+ build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
+ build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
}
#if CONFIG_EXT_INTER
@@ -284,6 +262,27 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
nmv_context_counts *counts = &nmv_counts[nmv_ctx];
av1_inc_mv(&diff, counts, 1);
+#if CONFIG_COMPOUND_SINGLEREF
+ } else {
+ assert( // mode == SR_NEAREST_NEWMV ||
+ mode == SR_NEAR_NEWMV || mode == SR_ZERO_NEWMV || mode == SR_NEW_NEWMV);
+ const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
+ int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
+ int nmv_ctx =
+ av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
+ mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+ (void)pred_mvs;
+ MV diff;
+ if (mode == SR_NEW_NEWMV) {
+ diff.row = mvs[0].as_mv.row - ref->row;
+ diff.col = mvs[0].as_mv.col - ref->col;
+ av1_inc_mv(&diff, counts, 1);
+ }
+ diff.row = mvs[1].as_mv.row - ref->row;
+ diff.col = mvs[1].as_mv.col - ref->col;
+ av1_inc_mv(&diff, counts, 1);
+#endif // CONFIG_COMPOUND_SINGLEREF
}
}
@@ -328,7 +327,7 @@ static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
av1_inc_mv(&diff, counts, 1);
}
}
-#else
+#else // !CONFIG_EXT_INTER
static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
const int_mv mvs[2], const int_mv pred_mvs[2],
nmv_context_counts *nmv_counts) {
diff --git a/third_party/aom/av1/encoder/encodemv.h b/third_party/aom/av1/encoder/encodemv.h
index 6d442147f..8689cec27 100644
--- a/third_party/aom/av1/encoder/encodemv.h
+++ b/third_party/aom/av1/encoder/encodemv.h
@@ -20,14 +20,17 @@ extern "C" {
void av1_entropy_mv_init(void);
+#if !CONFIG_NEW_MULTISYMBOL
void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w,
nmv_context_counts *const counts);
+#endif
void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
nmv_context *mvctx, int usehp);
void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
- const nmv_context *mvctx, int usehp);
+ const nmv_context *mvctx,
+ MvSubpelPrecision precision);
void av1_update_mv_count(ThreadData *td);
diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c
index 4782ce2b7..943e2c6a0 100644
--- a/third_party/aom/av1/encoder/encoder.c
+++ b/third_party/aom/av1/encoder/encoder.c
@@ -18,7 +18,6 @@
#include "av1/common/alloccommon.h"
#if CONFIG_CDEF
#include "av1/common/cdef.h"
-#include "av1/common/clpf.h"
#endif // CONFIG_CDEF
#include "av1/common/filter.h"
#include "av1/common/idct.h"
@@ -31,6 +30,9 @@
#include "av1/encoder/aq_cyclicrefresh.h"
#include "av1/encoder/aq_variance.h"
#include "av1/encoder/bitstream.h"
+#if CONFIG_BGSPRITE
+#include "av1/encoder/bgsprite.h"
+#endif // CONFIG_BGSPRITE
#if CONFIG_ANS
#include "aom_dsp/buf_ans.h"
#endif
@@ -73,6 +75,8 @@
#if CONFIG_ENTROPY_STATS
FRAME_COUNTS aggregate_fc;
+// Aggregate frame counts per frame context type
+FRAME_COUNTS aggregate_fc_per_type[FRAME_CONTEXTS];
#endif // CONFIG_ENTROPY_STATS
#define AM_SEGMENT_ID_INACTIVE 7
@@ -421,7 +425,6 @@ void av1_initialize_enc(void) {
static void dealloc_compressor_data(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
- int i;
aom_free(cpi->mbmi_ext_base);
cpi->mbmi_ext_base = NULL;
@@ -467,10 +470,6 @@ static void dealloc_compressor_data(AV1_COMP *cpi) {
cpi->td.mb.mask_buf = NULL;
#endif
- // Free up-sampled reference buffers.
- for (i = 0; i < (REF_FRAMES + 1); i++)
- aom_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf);
-
av1_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_LV_MAP
av1_free_txb_buf(cpi);
@@ -483,8 +482,11 @@ static void dealloc_compressor_data(AV1_COMP *cpi) {
aom_free_frame_buffer(&cpi->last_frame_db);
aom_free_frame_buffer(&cpi->trial_frame_rst);
aom_free(cpi->extra_rstbuf);
- for (i = 0; i < MAX_MB_PLANE; ++i)
- av1_free_restoration_struct(&cpi->rst_search[i]);
+ {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ av1_free_restoration_struct(&cpi->rst_search[i]);
+ }
#endif // CONFIG_LOOP_RESTORATION
aom_free_frame_buffer(&cpi->scaled_source);
aom_free_frame_buffer(&cpi->scaled_last_source);
@@ -497,8 +499,7 @@ static void dealloc_compressor_data(AV1_COMP *cpi) {
av1_free_pc_tree(&cpi->td);
#if CONFIG_PALETTE
- if (cpi->common.allow_screen_content_tools)
- aom_free(cpi->td.mb.palette_buffer);
+ aom_free(cpi->td.mb.palette_buffer);
#endif // CONFIG_PALETTE
#if CONFIG_ANS
@@ -735,13 +736,18 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) {
NULL, NULL))
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate last frame deblocked buffer");
- if (aom_realloc_frame_buffer(&cpi->trial_frame_rst, cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
+ if (aom_realloc_frame_buffer(
+ &cpi->trial_frame_rst,
+#if CONFIG_FRAME_SUPERRES
+ cm->superres_upscaled_width, cm->superres_upscaled_height,
+#else
+ cm->width, cm->height,
+#endif // CONFIG_FRAME_SUPERRES
+ cm->subsampling_x, cm->subsampling_y,
#if CONFIG_HIGHBITDEPTH
- cm->use_highbitdepth,
+ cm->use_highbitdepth,
#endif
- AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL,
- NULL, NULL))
+ AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate trial restored frame buffer");
int extra_rstbuf_sz = RESTORATION_EXTBUF_SIZE;
@@ -821,93 +827,107 @@ void av1_new_framerate(AV1_COMP *cpi, double framerate) {
static void set_tile_info(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
-#if CONFIG_TILE_GROUPS && CONFIG_DEPENDENT_HORZTILES
+#if CONFIG_DEPENDENT_HORZTILES
int tile_row, tile_col, num_tiles_in_tg;
int tg_row_start, tg_col_start;
#endif
#if CONFIG_EXT_TILE
+ if (cpi->oxcf.large_scale_tile) {
#if CONFIG_EXT_PARTITION
- if (cpi->oxcf.superblock_size != AOM_SUPERBLOCK_SIZE_64X64) {
- cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 32);
- cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 32);
- cm->tile_width <<= MAX_MIB_SIZE_LOG2;
- cm->tile_height <<= MAX_MIB_SIZE_LOG2;
- } else {
+ if (cpi->oxcf.superblock_size != AOM_SUPERBLOCK_SIZE_64X64) {
+ cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 32);
+ cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 32);
+ cm->tile_width <<= MAX_MIB_SIZE_LOG2;
+ cm->tile_height <<= MAX_MIB_SIZE_LOG2;
+ } else {
+ cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64);
+ cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64);
+ cm->tile_width <<= MAX_MIB_SIZE_LOG2 - 1;
+ cm->tile_height <<= MAX_MIB_SIZE_LOG2 - 1;
+ }
+#else
cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64);
cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64);
- cm->tile_width <<= MAX_MIB_SIZE_LOG2 - 1;
- cm->tile_height <<= MAX_MIB_SIZE_LOG2 - 1;
- }
-#else
- cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64);
- cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64);
- cm->tile_width <<= MAX_MIB_SIZE_LOG2;
- cm->tile_height <<= MAX_MIB_SIZE_LOG2;
+ cm->tile_width <<= MAX_MIB_SIZE_LOG2;
+ cm->tile_height <<= MAX_MIB_SIZE_LOG2;
#endif // CONFIG_EXT_PARTITION
- cm->tile_width = AOMMIN(cm->tile_width, cm->mi_cols);
- cm->tile_height = AOMMIN(cm->tile_height, cm->mi_rows);
+ cm->tile_width = AOMMIN(cm->tile_width, cm->mi_cols);
+ cm->tile_height = AOMMIN(cm->tile_height, cm->mi_rows);
- assert(cm->tile_width >> MAX_MIB_SIZE <= 32);
- assert(cm->tile_height >> MAX_MIB_SIZE <= 32);
+ assert(cm->tile_width >> MAX_MIB_SIZE <= 32);
+ assert(cm->tile_height >> MAX_MIB_SIZE <= 32);
- // Get the number of tiles
- cm->tile_cols = 1;
- while (cm->tile_cols * cm->tile_width < cm->mi_cols) ++cm->tile_cols;
+ // Get the number of tiles
+ cm->tile_cols = 1;
+ while (cm->tile_cols * cm->tile_width < cm->mi_cols) ++cm->tile_cols;
- cm->tile_rows = 1;
- while (cm->tile_rows * cm->tile_height < cm->mi_rows) ++cm->tile_rows;
-#else
- int min_log2_tile_cols, max_log2_tile_cols;
- av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+ cm->tile_rows = 1;
+ while (cm->tile_rows * cm->tile_height < cm->mi_rows) ++cm->tile_rows;
+ } else {
+#endif // CONFIG_EXT_TILE
+ int min_log2_tile_cols, max_log2_tile_cols;
+ av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
- cm->log2_tile_cols =
- clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
- cm->log2_tile_rows = cpi->oxcf.tile_rows;
+ cm->log2_tile_cols =
+ clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
+ cm->log2_tile_rows = cpi->oxcf.tile_rows;
- cm->tile_cols = 1 << cm->log2_tile_cols;
- cm->tile_rows = 1 << cm->log2_tile_rows;
+ cm->tile_cols = 1 << cm->log2_tile_cols;
+ cm->tile_rows = 1 << cm->log2_tile_rows;
- cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
- cm->tile_width >>= cm->log2_tile_cols;
- cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
- cm->tile_height >>= cm->log2_tile_rows;
+ cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+ cm->tile_width >>= cm->log2_tile_cols;
+ cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+ cm->tile_height >>= cm->log2_tile_rows;
- // round to integer multiples of max superblock size
- cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
- cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
+ // round to integer multiples of max superblock size
+ cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
+ cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
+#if CONFIG_EXT_TILE
+ }
#endif // CONFIG_EXT_TILE
#if CONFIG_DEPENDENT_HORZTILES
cm->dependent_horz_tiles = cpi->oxcf.dependent_horz_tiles;
#if CONFIG_EXT_TILE
- if (cm->tile_rows <= 1) cm->dependent_horz_tiles = 0;
-#else
- if (cm->log2_tile_rows == 0) cm->dependent_horz_tiles = 0;
-#endif
-#if CONFIG_TILE_GROUPS
- if (cpi->oxcf.mtu == 0) {
- cm->num_tg = cpi->oxcf.num_tile_groups;
+ if (cm->large_scale_tile) {
+ // May not needed since cpi->oxcf.dependent_horz_tiles is already adjusted.
+ cm->dependent_horz_tiles = 0;
} else {
- // Use a default value for the purposes of weighting costs in probability
- // updates
- cm->num_tg = DEFAULT_MAX_NUM_TG;
+#endif // CONFIG_EXT_TILE
+ if (cm->log2_tile_rows == 0) cm->dependent_horz_tiles = 0;
+#if CONFIG_EXT_TILE
}
- num_tiles_in_tg =
- (cm->tile_cols * cm->tile_rows + cm->num_tg - 1) / cm->num_tg;
- tg_row_start = 0;
- tg_col_start = 0;
- for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
- for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
- if ((tile_row * cm->tile_cols + tile_col) % num_tiles_in_tg == 0) {
- tg_row_start = tile_row;
- tg_col_start = tile_col;
+#endif // CONFIG_EXT_TILE
+
+#if CONFIG_EXT_TILE
+ if (!cm->large_scale_tile) {
+#endif // CONFIG_EXT_TILE
+ if (cpi->oxcf.mtu == 0) {
+ cm->num_tg = cpi->oxcf.num_tile_groups;
+ } else {
+ // Use a default value for the purposes of weighting costs in probability
+ // updates
+ cm->num_tg = DEFAULT_MAX_NUM_TG;
+ }
+ num_tiles_in_tg =
+ (cm->tile_cols * cm->tile_rows + cm->num_tg - 1) / cm->num_tg;
+ tg_row_start = 0;
+ tg_col_start = 0;
+ for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
+ if ((tile_row * cm->tile_cols + tile_col) % num_tiles_in_tg == 0) {
+ tg_row_start = tile_row;
+ tg_col_start = tile_col;
+ }
+ cm->tile_group_start_row[tile_row][tile_col] = tg_row_start;
+ cm->tile_group_start_col[tile_row][tile_col] = tg_col_start;
}
- cm->tile_group_start_row[tile_row][tile_col] = tg_row_start;
- cm->tile_group_start_col[tile_row][tile_col] = tg_col_start;
}
+#if CONFIG_EXT_TILE
}
-#endif
+#endif // CONFIG_EXT_TILE
#endif
#if CONFIG_LOOPFILTERING_ACROSS_TILES
@@ -965,6 +985,10 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
cm->use_highbitdepth = oxcf->use_highbitdepth;
#endif
cm->color_space = oxcf->color_space;
+#if CONFIG_COLORSPACE_HEADERS
+ cm->transfer_function = oxcf->transfer_function;
+ cm->chroma_sample_position = oxcf->chroma_sample_position;
+#endif
cm->color_range = oxcf->color_range;
cm->width = oxcf->width;
@@ -1175,6 +1199,21 @@ MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad4x4x3)
MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad4x4x8)
MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d)
+#if CONFIG_EXT_PARTITION_TYPES
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x16)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x16_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x16x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x4)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x4_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x4x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x32)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x32_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x32x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x8)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x8_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x8x4d)
+#endif
+
#if CONFIG_EXT_INTER
#define HIGHBD_MBFP(BT, MCSDF, MCSVF) \
cpi->fn_ptr[BT].msdf = MCSDF; \
@@ -1223,6 +1262,13 @@ MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x8)
MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x4)
MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x8)
MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x4)
+
+#if CONFIG_EXT_PARTITION_TYPES
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x16)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x4)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x32)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x8)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
@@ -1266,6 +1312,13 @@ MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x8)
MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x4)
MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x8)
MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x4)
+
+#if CONFIG_EXT_PARTITION_TYPES
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x16)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x4)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x32)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x8)
+#endif
#endif // CONFIG_MOTION_VAR
static void highbd_set_var_fns(AV1_COMP *const cpi) {
@@ -1273,6 +1326,32 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
if (cm->use_highbitdepth) {
switch (cm->bit_depth) {
case AOM_BITS_8:
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits8,
+ aom_highbd_sad32x8_avg_bits8, aom_highbd_8_variance32x8,
+ aom_highbd_8_sub_pixel_variance32x8,
+ aom_highbd_8_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_highbd_sad32x8x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits8,
+ aom_highbd_sad8x32_avg_bits8, aom_highbd_8_variance8x32,
+ aom_highbd_8_sub_pixel_variance8x32,
+ aom_highbd_8_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_highbd_sad8x32x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits8,
+ aom_highbd_sad16x4_avg_bits8, aom_highbd_8_variance16x4,
+ aom_highbd_8_sub_pixel_variance16x4,
+ aom_highbd_8_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_highbd_sad16x4x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits8,
+ aom_highbd_sad4x16_avg_bits8, aom_highbd_8_variance4x16,
+ aom_highbd_8_sub_pixel_variance4x16,
+ aom_highbd_8_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_highbd_sad4x16x4d_bits8)
+#endif
+
HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits8,
aom_highbd_sad32x16_avg_bits8, aom_highbd_8_variance32x16,
aom_highbd_8_sub_pixel_variance32x16,
@@ -1354,7 +1433,7 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
aom_highbd_8_sub_pixel_avg_variance4x4, aom_highbd_sad4x4x3_bits8,
aom_highbd_sad4x4x8_bits8, aom_highbd_sad4x4x4d_bits8)
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
HIGHBD_BFP(BLOCK_2X2, NULL, NULL, aom_highbd_8_variance2x2, NULL, NULL,
NULL, NULL, NULL)
HIGHBD_BFP(BLOCK_4X2, NULL, NULL, aom_highbd_8_variance4x2, NULL, NULL,
@@ -1420,6 +1499,19 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
aom_highbd_8_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8,
aom_highbd_8_masked_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits8,
+ aom_highbd_8_masked_sub_pixel_variance32x8)
+
+ HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits8,
+ aom_highbd_8_masked_sub_pixel_variance8x32)
+
+ HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits8,
+ aom_highbd_8_masked_sub_pixel_variance16x4)
+
+ HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits8,
+ aom_highbd_8_masked_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
#if CONFIG_EXT_PARTITION
@@ -1472,10 +1564,53 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits8,
aom_highbd_obmc_variance4x4,
aom_highbd_obmc_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits8,
+ aom_highbd_obmc_variance32x8,
+ aom_highbd_obmc_sub_pixel_variance32x8)
+
+ HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits8,
+ aom_highbd_obmc_variance8x32,
+ aom_highbd_obmc_sub_pixel_variance8x32)
+
+ HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits8,
+ aom_highbd_obmc_variance16x4,
+ aom_highbd_obmc_sub_pixel_variance16x4)
+
+ HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits8,
+ aom_highbd_obmc_variance4x16,
+ aom_highbd_obmc_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_MOTION_VAR
break;
case AOM_BITS_10:
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits10,
+ aom_highbd_sad32x8_avg_bits10, aom_highbd_10_variance32x8,
+ aom_highbd_10_sub_pixel_variance32x8,
+ aom_highbd_10_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_highbd_sad32x8x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits10,
+ aom_highbd_sad8x32_avg_bits10, aom_highbd_10_variance8x32,
+ aom_highbd_10_sub_pixel_variance8x32,
+ aom_highbd_10_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_highbd_sad8x32x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits10,
+ aom_highbd_sad16x4_avg_bits10, aom_highbd_10_variance16x4,
+ aom_highbd_10_sub_pixel_variance16x4,
+ aom_highbd_10_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_highbd_sad16x4x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits10,
+ aom_highbd_sad4x16_avg_bits10, aom_highbd_10_variance4x16,
+ aom_highbd_10_sub_pixel_variance4x16,
+ aom_highbd_10_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_highbd_sad4x16x4d_bits10)
+#endif
+
HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits10,
aom_highbd_sad32x16_avg_bits10, aom_highbd_10_variance32x16,
aom_highbd_10_sub_pixel_variance32x16,
@@ -1559,7 +1694,7 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
aom_highbd_10_sub_pixel_avg_variance4x4, aom_highbd_sad4x4x3_bits10,
aom_highbd_sad4x4x8_bits10, aom_highbd_sad4x4x4d_bits10)
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
HIGHBD_BFP(BLOCK_2X2, NULL, NULL, aom_highbd_10_variance2x2, NULL, NULL,
NULL, NULL, NULL)
HIGHBD_BFP(BLOCK_4X2, NULL, NULL, aom_highbd_10_variance4x2, NULL, NULL,
@@ -1627,6 +1762,19 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
aom_highbd_10_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10,
aom_highbd_10_masked_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits10,
+ aom_highbd_10_masked_sub_pixel_variance32x8)
+
+ HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits10,
+ aom_highbd_10_masked_sub_pixel_variance8x32)
+
+ HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits10,
+ aom_highbd_10_masked_sub_pixel_variance16x4)
+
+ HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits10,
+ aom_highbd_10_masked_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
#if CONFIG_EXT_PARTITION
@@ -1679,10 +1827,53 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits10,
aom_highbd_10_obmc_variance4x4,
aom_highbd_10_obmc_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits10,
+ aom_highbd_10_obmc_variance32x8,
+ aom_highbd_10_obmc_sub_pixel_variance32x8)
+
+ HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits10,
+ aom_highbd_10_obmc_variance8x32,
+ aom_highbd_10_obmc_sub_pixel_variance8x32)
+
+ HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits10,
+ aom_highbd_10_obmc_variance16x4,
+ aom_highbd_10_obmc_sub_pixel_variance16x4)
+
+ HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits10,
+ aom_highbd_10_obmc_variance4x16,
+ aom_highbd_10_obmc_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_MOTION_VAR
break;
case AOM_BITS_12:
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits12,
+ aom_highbd_sad32x8_avg_bits12, aom_highbd_12_variance32x8,
+ aom_highbd_12_sub_pixel_variance32x8,
+ aom_highbd_12_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_highbd_sad32x8x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits12,
+ aom_highbd_sad8x32_avg_bits12, aom_highbd_12_variance8x32,
+ aom_highbd_12_sub_pixel_variance8x32,
+ aom_highbd_12_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_highbd_sad8x32x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits12,
+ aom_highbd_sad16x4_avg_bits12, aom_highbd_12_variance16x4,
+ aom_highbd_12_sub_pixel_variance16x4,
+ aom_highbd_12_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_highbd_sad16x4x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits12,
+ aom_highbd_sad4x16_avg_bits12, aom_highbd_12_variance4x16,
+ aom_highbd_12_sub_pixel_variance4x16,
+ aom_highbd_12_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_highbd_sad4x16x4d_bits12)
+#endif
+
HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits12,
aom_highbd_sad32x16_avg_bits12, aom_highbd_12_variance32x16,
aom_highbd_12_sub_pixel_variance32x16,
@@ -1766,7 +1957,7 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
aom_highbd_12_sub_pixel_avg_variance4x4, aom_highbd_sad4x4x3_bits12,
aom_highbd_sad4x4x8_bits12, aom_highbd_sad4x4x4d_bits12)
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
HIGHBD_BFP(BLOCK_2X2, NULL, NULL, aom_highbd_12_variance2x2, NULL, NULL,
NULL, NULL, NULL)
HIGHBD_BFP(BLOCK_4X2, NULL, NULL, aom_highbd_12_variance4x2, NULL, NULL,
@@ -1834,6 +2025,19 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
aom_highbd_12_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12,
aom_highbd_12_masked_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits12,
+ aom_highbd_12_masked_sub_pixel_variance32x8)
+
+ HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits12,
+ aom_highbd_12_masked_sub_pixel_variance8x32)
+
+ HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits12,
+ aom_highbd_12_masked_sub_pixel_variance16x4)
+
+ HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits12,
+ aom_highbd_12_masked_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
@@ -1887,6 +2091,23 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits12,
aom_highbd_12_obmc_variance4x4,
aom_highbd_12_obmc_sub_pixel_variance4x4)
+#if CONFIG_EXT_PARTITION_TYPES
+ HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits12,
+ aom_highbd_12_obmc_variance32x8,
+ aom_highbd_12_obmc_sub_pixel_variance32x8)
+
+ HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits12,
+ aom_highbd_12_obmc_variance8x32,
+ aom_highbd_12_obmc_sub_pixel_variance8x32)
+
+ HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits12,
+ aom_highbd_12_obmc_variance16x4,
+ aom_highbd_12_obmc_sub_pixel_variance16x4)
+
+ HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits12,
+ aom_highbd_12_obmc_variance4x16,
+ aom_highbd_12_obmc_sub_pixel_variance4x16)
+#endif
#endif // CONFIG_MOTION_VAR
break;
@@ -1933,10 +2154,15 @@ void set_compound_tools(AV1_COMMON *cm) {
void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ MACROBLOCK *const x = &cpi->td.mb;
if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
cm->bit_depth = oxcf->bit_depth;
cm->color_space = oxcf->color_space;
+#if CONFIG_COLORSPACE_HEADERS
+ cm->transfer_function = oxcf->transfer_function;
+ cm->chroma_sample_position = oxcf->chroma_sample_position;
+#endif
cm->color_range = oxcf->color_range;
if (cm->profile <= PROFILE_1)
@@ -1945,9 +2171,9 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
assert(cm->bit_depth > AOM_BITS_8);
cpi->oxcf = *oxcf;
- cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
+ x->e_mbd.bd = (int)cm->bit_depth;
#if CONFIG_GLOBAL_MOTION
- cpi->td.mb.e_mbd.global_motion = cm->global_motion;
+ x->e_mbd.global_motion = cm->global_motion;
#endif // CONFIG_GLOBAL_MOTION
if ((oxcf->pass == 0) && (oxcf->rc_mode == AOM_Q)) {
@@ -1969,17 +2195,9 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
#if CONFIG_PALETTE
- cm->allow_screen_content_tools = (cpi->oxcf.content == AOM_CONTENT_SCREEN);
- if (cm->allow_screen_content_tools) {
- MACROBLOCK *x = &cpi->td.mb;
- if (x->palette_buffer == 0) {
- CHECK_MEM_ERROR(cm, x->palette_buffer,
- aom_memalign(16, sizeof(*x->palette_buffer)));
- }
- // Reallocate the pc_tree, as it's contents depends on
- // the state of cm->allow_screen_content_tools
- av1_free_pc_tree(&cpi->td);
- av1_setup_pc_tree(&cpi->common, &cpi->td);
+ if (x->palette_buffer == NULL) {
+ CHECK_MEM_ERROR(cm, x->palette_buffer,
+ aom_memalign(16, sizeof(*x->palette_buffer)));
}
#endif // CONFIG_PALETTE
#if CONFIG_EXT_INTER
@@ -2058,15 +2276,6 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
#endif // CONFIG_ANS && ANS_MAX_SYMBOLS
}
-static INLINE void init_upsampled_ref_frame_bufs(AV1_COMP *cpi) {
- int i;
-
- for (i = 0; i < (REF_FRAMES + 1); ++i) {
- cpi->upsampled_ref_bufs[i].ref_count = 0;
- cpi->upsampled_ref_idx[i] = INVALID_IDX;
- }
-}
-
AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
BufferPool *const pool) {
unsigned int i;
@@ -2099,10 +2308,6 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
cpi->resize_state = 0;
cpi->resize_avg_qp = 0;
cpi->resize_buffer_underflow = 0;
- cpi->resize_scale_num = 16;
- cpi->resize_scale_den = 16;
- cpi->resize_next_scale_num = 16;
- cpi->resize_next_scale_den = 16;
cpi->common.buffer_pool = pool;
@@ -2197,6 +2402,7 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
#endif
#if CONFIG_ENTROPY_STATS
av1_zero(aggregate_fc);
+ av1_zero_array(aggregate_fc_per_type, FRAME_CONTEXTS);
#endif // CONFIG_ENTROPY_STATS
cpi->first_time_stamp_ever = INT64_MAX;
@@ -2278,8 +2484,6 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
#endif
- init_upsampled_ref_frame_bufs(cpi);
-
av1_set_speed_features_framesize_independent(cpi);
av1_set_speed_features_framesize_dependent(cpi);
@@ -2293,6 +2497,24 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
cpi->fn_ptr[BT].sdx8f = SDX8F; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
+#if CONFIG_EXT_PARTITION_TYPES
+ BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16,
+ aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_sad4x16x4d)
+
+ BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4,
+ aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_sad16x4x4d)
+
+ BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32,
+ aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_sad8x32x4d)
+
+ BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8,
+ aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_sad32x8x4d)
+#endif
+
#if CONFIG_EXT_PARTITION
BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128,
aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128,
@@ -2359,7 +2581,7 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3,
aom_sad4x4x8, aom_sad4x4x4d)
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
BFP(BLOCK_2X2, NULL, NULL, aom_variance2x2, NULL, NULL, NULL, NULL, NULL)
BFP(BLOCK_2X4, NULL, NULL, aom_variance2x4, NULL, NULL, NULL, NULL, NULL)
BFP(BLOCK_4X2, NULL, NULL, aom_variance4x2, NULL, NULL, NULL, NULL, NULL)
@@ -2405,6 +2627,20 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
aom_obmc_sub_pixel_variance8x4)
OBFP(BLOCK_4X4, aom_obmc_sad4x4, aom_obmc_variance4x4,
aom_obmc_sub_pixel_variance4x4)
+
+#if CONFIG_EXT_PARTITION_TYPES
+ OBFP(BLOCK_4X16, aom_obmc_sad4x16, aom_obmc_variance4x16,
+ aom_obmc_sub_pixel_variance4x16)
+
+ OBFP(BLOCK_16X4, aom_obmc_sad16x4, aom_obmc_variance16x4,
+ aom_obmc_sub_pixel_variance16x4)
+
+ OBFP(BLOCK_8X32, aom_obmc_sad8x32, aom_obmc_variance8x32,
+ aom_obmc_sub_pixel_variance8x32)
+
+ OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8,
+ aom_obmc_sub_pixel_variance32x8)
+#endif
#endif // CONFIG_MOTION_VAR
#if CONFIG_EXT_INTER
@@ -2431,6 +2667,16 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_sub_pixel_variance4x8)
MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_sub_pixel_variance8x4)
MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_sub_pixel_variance4x4)
+
+#if CONFIG_EXT_PARTITION_TYPES
+ MBFP(BLOCK_4X16, aom_masked_sad4x16, aom_masked_sub_pixel_variance4x16)
+
+ MBFP(BLOCK_16X4, aom_masked_sad16x4, aom_masked_sub_pixel_variance16x4)
+
+ MBFP(BLOCK_8X32, aom_masked_sad8x32, aom_masked_sub_pixel_variance8x32)
+
+ MBFP(BLOCK_32X8, aom_masked_sad32x8, aom_masked_sub_pixel_variance32x8)
+#endif
#endif // CONFIG_EXT_INTER
#if CONFIG_HIGHBITDEPTH
@@ -2449,7 +2695,9 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
av1_loop_filter_init(cm);
#if CONFIG_FRAME_SUPERRES
- cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR;
+ cm->superres_scale_numerator = SCALE_DENOMINATOR;
+ cm->superres_upscaled_width = oxcf->width;
+ cm->superres_upscaled_height = oxcf->height;
#endif // CONFIG_FRAME_SUPERRES
#if CONFIG_LOOP_RESTORATION
av1_loop_restoration_precal();
@@ -2479,6 +2727,8 @@ void av1_remove_compressor(AV1_COMP *cpi) {
fprintf(stderr, "Writing counts.stt\n");
FILE *f = fopen("counts.stt", "wb");
fwrite(&aggregate_fc, sizeof(aggregate_fc), 1, f);
+ fwrite(aggregate_fc_per_type, sizeof(aggregate_fc_per_type[0]),
+ FRAME_CONTEXTS, f);
fclose(f);
}
#endif // CONFIG_ENTROPY_STATS
@@ -2566,8 +2816,7 @@ void av1_remove_compressor(AV1_COMP *cpi) {
// Deallocate allocated thread data.
if (t < cpi->num_workers - 1) {
#if CONFIG_PALETTE
- if (cpi->common.allow_screen_content_tools)
- aom_free(thread_data->td->palette_buffer);
+ aom_free(thread_data->td->palette_buffer);
#endif // CONFIG_PALETTE
#if CONFIG_MOTION_VAR
aom_free(thread_data->td->above_pred_buf);
@@ -2835,71 +3084,6 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) {
}
#endif // OUTPUT_YUV_REC
-#if CONFIG_HIGHBITDEPTH
-static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int planes,
- int bd) {
-#else
-static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst, int planes) {
-#endif // CONFIG_HIGHBITDEPTH
- const int src_w = src->y_crop_width;
- const int src_h = src->y_crop_height;
- const int dst_w = dst->y_crop_width;
- const int dst_h = dst->y_crop_height;
- const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
- src->v_buffer };
- const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
- uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
- const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
- const InterpFilterParams interp_filter_params =
- av1_get_interp_filter_params(EIGHTTAP_REGULAR);
- const int16_t *kernel = interp_filter_params.filter_ptr;
- const int taps = interp_filter_params.taps;
- int x, y, i;
-
- assert(planes <= 3);
- for (y = 0; y < dst_h; y += 16) {
- for (x = 0; x < dst_w; x += 16) {
- for (i = 0; i < planes; ++i) {
- const int factor = (i == 0 || i == 3 ? 1 : 2);
- const int x_q4 = x * (16 / factor) * src_w / dst_w;
- const int y_q4 = y * (16 / factor) * src_h / dst_h;
- const int src_stride = src_strides[i];
- const int dst_stride = dst_strides[i];
- const uint8_t *src_ptr = srcs[i] +
- (y / factor) * src_h / dst_h * src_stride +
- (x / factor) * src_w / dst_w;
- uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
-
-#if CONFIG_HIGHBITDEPTH
- if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
- aom_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
- &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
- &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
- 16 / factor, 16 / factor, bd);
- } else {
- aom_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
- &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
- &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
- 16 / factor, 16 / factor);
- }
-#else
- aom_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
- &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
- &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
- 16 / factor, 16 / factor);
-#endif // CONFIG_HIGHBITDEPTH
- }
- }
- }
-
- if (planes == 1)
- aom_extend_frame_borders_y(dst);
- else
- aom_extend_frame_borders(dst);
-}
-
#if CONFIG_GLOBAL_MOTION
#define GM_RECODE_LOOP_NUM4X4_FACTOR 192
static int recode_loop_test_global_motion(AV1_COMP *cpi) {
@@ -2949,52 +3133,6 @@ static int recode_loop_test(AV1_COMP *cpi, int high_limit, int low_limit, int q,
return force_recode;
}
-static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) {
- int i;
-
- for (i = 0; i < (REF_FRAMES + 1); i++) {
- if (!ubufs[i].ref_count) {
- return i;
- }
- }
- return INVALID_IDX;
-}
-
-// Up-sample 1 reference frame.
-static INLINE int upsample_ref_frame(AV1_COMP *cpi,
- const YV12_BUFFER_CONFIG *const ref) {
- AV1_COMMON *const cm = &cpi->common;
- EncRefCntBuffer *ubufs = cpi->upsampled_ref_bufs;
- int new_uidx = get_free_upsampled_ref_buf(ubufs);
-
- if (new_uidx == INVALID_IDX) {
- return INVALID_IDX;
- } else {
- YV12_BUFFER_CONFIG *upsampled_ref = &ubufs[new_uidx].buf;
-
- // Can allocate buffer for Y plane only.
- if (upsampled_ref->buffer_alloc_sz < (ref->buffer_alloc_sz << 6))
- if (aom_realloc_frame_buffer(upsampled_ref, (cm->width << 3),
- (cm->height << 3), cm->subsampling_x,
- cm->subsampling_y,
-#if CONFIG_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- (AOM_BORDER_IN_PIXELS << 3),
- cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate up-sampled frame buffer");
-
-// Currently, only Y plane is up-sampled, U, V are not used.
-#if CONFIG_HIGHBITDEPTH
- scale_and_extend_frame(ref, upsampled_ref, 1, (int)cm->bit_depth);
-#else
- scale_and_extend_frame(ref, upsampled_ref, 1);
-#endif
- return new_uidx;
- }
-}
-
#define DUMP_REF_FRAME_IMAGES 0
#if DUMP_REF_FRAME_IMAGES == 1
@@ -3068,34 +3206,50 @@ static INLINE void shift_last_ref_frames(AV1_COMP *cpi) {
}
#endif // CONFIG_EXT_REFS
+#if CONFIG_VAR_REFS
+static void enc_check_valid_ref_frames(AV1_COMP *const cpi) {
+ AV1_COMMON *const cm = &cpi->common;
+ MV_REFERENCE_FRAME ref_frame;
+
+ // TODO(zoeliu): To handle ALTREF_FRAME the same way as do with other
+ // reference frames. Current encoder invalid ALTREF when ALTREF
+ // is the same as LAST, but invalid all the other references
+ // when they are the same as ALTREF.
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ int ref_buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - LAST_FRAME];
+
+ if (ref_buf_idx != INVALID_IDX) {
+ ref_buf->is_valid = 1;
+
+ MV_REFERENCE_FRAME ref;
+ for (ref = LAST_FRAME; ref < ref_frame; ++ref) {
+ int buf_idx = get_ref_frame_buf_idx(cpi, ref);
+ RefBuffer *const buf = &cm->frame_refs[ref - LAST_FRAME];
+ if (buf->is_valid && buf_idx == ref_buf_idx) {
+ if (ref_frame != ALTREF_FRAME || ref == LAST_FRAME) {
+ ref_buf->is_valid = 0;
+ break;
+ } else {
+ buf->is_valid = 0;
+ }
+ }
+ }
+ } else {
+ ref_buf->is_valid = 0;
+ }
+ }
+}
+#endif // CONFIG_VAR_REFS
+
void av1_update_reference_frames(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
BufferPool *const pool = cm->buffer_pool;
- const int use_upsampled_ref = cpi->sf.use_upsampled_references;
- int new_uidx = 0;
// NOTE: Save the new show frame buffer index for --test-code=warn, i.e.,
// for the purpose to verify no mismatch between encoder and decoder.
if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx;
- if (use_upsampled_ref) {
-#if CONFIG_EXT_REFS
- if (cm->show_existing_frame) {
- new_uidx = cpi->upsampled_ref_idx[cpi->existing_fb_idx_to_show];
- // TODO(zoeliu): Once following is confirmed, remove it.
- assert(cpi->upsampled_ref_bufs[new_uidx].ref_count > 0);
- } else {
-#endif // CONFIG_EXT_REFS
- // Up-sample the current encoded frame.
- RefCntBuffer *bufs = pool->frame_bufs;
- const YV12_BUFFER_CONFIG *const ref = &bufs[cm->new_fb_idx].buf;
-
- new_uidx = upsample_ref_frame(cpi, ref);
-#if CONFIG_EXT_REFS
- assert(new_uidx != INVALID_IDX);
- }
-#endif // CONFIG_EXT_REFS
- }
// At this point the new frame has been encoded.
// If any buffer copy / swapping is signaled it should be done here.
if (cm->frame_type == KEY_FRAME) {
@@ -3107,17 +3261,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
#endif // CONFIG_EXT_REFS
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
cm->new_fb_idx);
-
- if (use_upsampled_ref) {
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
-#if CONFIG_EXT_REFS
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->bwd_fb_idx], new_uidx);
-#endif // CONFIG_EXT_REFS
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
- }
} else if (av1_preserve_existing_gf(cpi)) {
// We have decided to preserve the previously existing golden frame as our
// new ARF frame. However, in the short term in function
@@ -3131,10 +3274,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
cm->new_fb_idx);
- if (use_upsampled_ref)
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
-
tmp = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->gld_fb_idx;
cpi->gld_fb_idx = tmp;
@@ -3146,19 +3285,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
// TODO(zoeliu): Do we need to copy cpi->interp_filter_selected[0] over to
// cpi->interp_filter_selected[GOLDEN_FRAME]?
#if CONFIG_EXT_REFS
- } else if (cpi->rc.is_last_bipred_frame) {
- // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the LAST3_FRAME
- // by updating the virtual indices. Note that the frame BWDREF_FRAME points
- // to now should be retired, and it should not be used before refreshed.
- int tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
-
- shift_last_ref_frames(cpi);
- cpi->lst_fb_idxes[0] = cpi->bwd_fb_idx;
- cpi->bwd_fb_idx = tmp;
-
- memcpy(cpi->interp_filter_selected[LAST_FRAME],
- cpi->interp_filter_selected[BWDREF_FRAME],
- sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
} else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) {
// Deal with the special case for showing existing internal ALTREF_FRAME
// Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME
@@ -3195,9 +3321,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
}
#endif // CONFIG_EXT_REFS
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
- if (use_upsampled_ref)
- uref_cnt_fb(cpi->upsampled_ref_bufs, &cpi->upsampled_ref_idx[arf_idx],
- new_uidx);
memcpy(cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
cpi->interp_filter_selected[0],
@@ -3207,9 +3330,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
if (cpi->refresh_golden_frame) {
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
cm->new_fb_idx);
- if (use_upsampled_ref)
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
#if !CONFIG_EXT_REFS
if (!cpi->rc.is_src_frame_alt_ref)
@@ -3234,9 +3354,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->bwd_fb_idx],
cm->new_fb_idx);
- if (use_upsampled_ref)
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->bwd_fb_idx], new_uidx);
memcpy(cpi->interp_filter_selected[BWDREF_FRAME],
cpi->interp_filter_selected[0],
@@ -3293,11 +3410,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
ref_cnt_fb(pool->frame_bufs,
&cm->ref_frame_map[cpi->lst_fb_idxes[ref_frame]],
cm->new_fb_idx);
-
- if (use_upsampled_ref)
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->lst_fb_idxes[ref_frame]],
- new_uidx);
}
} else {
int tmp;
@@ -3306,30 +3418,39 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
&cm->ref_frame_map[cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]],
cm->new_fb_idx);
- if (use_upsampled_ref)
- uref_cnt_fb(
- cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]],
- new_uidx);
-
tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
shift_last_ref_frames(cpi);
cpi->lst_fb_idxes[0] = tmp;
assert(cm->show_existing_frame == 0);
- // NOTE: Currently only LF_UPDATE and INTNL_OVERLAY_UPDATE frames are to
- // refresh the LAST_FRAME.
memcpy(cpi->interp_filter_selected[LAST_FRAME],
cpi->interp_filter_selected[0],
sizeof(cpi->interp_filter_selected[0]));
+
+ if (cpi->rc.is_last_bipred_frame) {
+ // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the
+ // LAST3_FRAME by updating the virtual indices.
+ //
+ // NOTE: The source frame for BWDREF does not have a holding position as
+ // the OVERLAY frame for ALTREF's. Hence, to resolve the reference
+ // virtual index reshuffling for BWDREF, the encoder always
+ // specifies a LAST_BIPRED right before BWDREF and completes the
+ // reshuffling job accordingly.
+ tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
+
+ shift_last_ref_frames(cpi);
+ cpi->lst_fb_idxes[0] = cpi->bwd_fb_idx;
+ cpi->bwd_fb_idx = tmp;
+
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[BWDREF_FRAME],
+ sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
+ }
}
#else
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
cm->new_fb_idx);
- if (use_upsampled_ref)
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
if (!cpi->rc.is_src_frame_alt_ref) {
memcpy(cpi->interp_filter_selected[LAST_FRAME],
cpi->interp_filter_selected[0],
@@ -3344,61 +3465,8 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
#endif // DUMP_REF_FRAME_IMAGES
}
-static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
- MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
- struct loopfilter *lf = &cm->lf;
- if (is_lossless_requested(&cpi->oxcf)) {
- lf->filter_level = 0;
- } else {
- struct aom_usec_timer timer;
-
- aom_clear_system_state();
-
- aom_usec_timer_start(&timer);
-
- av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick);
-
- aom_usec_timer_mark(&timer);
- cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer);
- }
-
- if (lf->filter_level > 0) {
-#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4
- av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
-#else
- if (cpi->num_workers > 1)
- av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
- lf->filter_level, 0, 0, cpi->workers,
- cpi->num_workers, &cpi->lf_row_sync);
- else
- av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
-#endif
- }
-#if CONFIG_CDEF
- if (is_lossless_requested(&cpi->oxcf)) {
- cm->cdef_bits = 0;
- cm->cdef_strengths[0] = 0;
- cm->nb_cdef_strengths = 1;
- } else {
- // Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v
- av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd);
-
- // Apply the filter
- av1_cdef_frame(cm->frame_to_show, cm, xd);
- }
-#endif
-#if CONFIG_LOOP_RESTORATION
- av1_pick_filter_restoration(cpi->source, cpi, cpi->sf.lpf_pick);
- if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
- cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
- cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
- av1_loop_restoration_frame(cm->frame_to_show, cm, cm->rst_info, 7, 0, NULL);
- }
-#endif // CONFIG_LOOP_RESTORATION
- aom_extend_frame_inner_borders(cm->frame_to_show);
-}
-
static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, int buffer_idx) {
+ assert(buffer_idx != INVALID_IDX);
RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
new_fb_ptr->mi_cols < cm->mi_cols) {
@@ -3458,8 +3526,8 @@ void av1_scale_references(AV1_COMP *cpi) {
cm->byte_alignment, NULL, NULL, NULL))
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
- scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE,
- (int)cm->bit_depth);
+ av1_resize_and_extend_frame(ref, &new_fb_ptr->buf,
+ (int)cm->bit_depth);
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
alloc_frame_mvs(cm, new_fb);
}
@@ -3482,36 +3550,11 @@ void av1_scale_references(AV1_COMP *cpi) {
NULL, NULL, NULL))
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
- scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE);
+ av1_resize_and_extend_frame(ref, &new_fb_ptr->buf);
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
alloc_frame_mvs(cm, new_fb);
}
#endif // CONFIG_HIGHBITDEPTH
-
- if (cpi->sf.use_upsampled_references &&
- (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
- new_fb_ptr->buf.y_crop_height != cm->height)) {
- const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
- EncRefCntBuffer *ubuf =
- &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[map_idx]];
-
- if (aom_realloc_frame_buffer(&ubuf->buf, (cm->width << 3),
- (cm->height << 3), cm->subsampling_x,
- cm->subsampling_y,
-#if CONFIG_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- (AOM_BORDER_IN_PIXELS << 3),
- cm->byte_alignment, NULL, NULL, NULL))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate up-sampled frame buffer");
-#if CONFIG_HIGHBITDEPTH
- scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1,
- (int)cm->bit_depth);
-#else
- scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1);
-#endif
- }
} else {
const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
@@ -3742,66 +3785,38 @@ static void init_motion_estimation(AV1_COMP *cpi) {
}
#if CONFIG_LOOP_RESTORATION
-static void set_restoration_tilesize(int width, int height,
+#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0
+static void set_restoration_tilesize(int width, int height, int sx, int sy,
RestorationInfo *rst) {
(void)width;
(void)height;
+ (void)sx;
+ (void)sy;
+#if COUPLED_CHROMA_FROM_LUMA_RESTORATION
+ int s = AOMMIN(sx, sy);
+#else
+ int s = 0;
+#endif // !COUPLED_CHROMA_FROM_LUMA_RESTORATION
+
rst[0].restoration_tilesize = (RESTORATION_TILESIZE_MAX >> 1);
- rst[1].restoration_tilesize = rst[0].restoration_tilesize;
- rst[2].restoration_tilesize = rst[0].restoration_tilesize;
+ rst[1].restoration_tilesize = rst[0].restoration_tilesize >> s;
+ rst[2].restoration_tilesize = rst[1].restoration_tilesize;
}
#endif // CONFIG_LOOP_RESTORATION
-static void set_scaled_size(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- AV1EncoderConfig *const oxcf = &cpi->oxcf;
-
- // TODO(afergs): Replace with call to av1_resize_pending? Could replace
- // scaled_size_set as well.
- // TODO(afergs): Realistically, if resize_pending is true, then the other
- // conditions must already be satisfied.
- // Try this first:
- // av1_resize_pending &&
- // (DYNAMIC && (1 Pass CBR || 2 Pass VBR)
- // STATIC && FIRST_FRAME)
- // Really, av1_resize_pending should just reflect the above.
- // TODO(afergs): Allow fixed resizing in AOM_CBR mode?
- // 2 Pass VBR: Resize if fixed resize and first frame, or dynamic resize and
- // a resize is pending.
- // 1 Pass CBR: Resize if dynamic resize and resize pending.
- if ((oxcf->pass == 2 && oxcf->rc_mode == AOM_VBR &&
- ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
- (oxcf->resize_mode == RESIZE_DYNAMIC && av1_resize_pending(cpi)))) ||
- (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR &&
- oxcf->resize_mode == RESIZE_DYNAMIC && av1_resize_pending(cpi))) {
- // TODO(afergs): This feels hacky... Should it just set? Should
- // av1_set_next_scaled_size be a library function?
- av1_calculate_next_scaled_size(cpi, &oxcf->scaled_frame_width,
- &oxcf->scaled_frame_height);
- }
-}
-
static void set_frame_size(AV1_COMP *cpi, int width, int height) {
- int ref_frame;
AV1_COMMON *const cm = &cpi->common;
- AV1EncoderConfig *const oxcf = &cpi->oxcf;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ int ref_frame;
if (width != cm->width || height != cm->height) {
// There has been a change in the encoded frame size
av1_set_size_literal(cpi, width, height);
-
- // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
- // TODO(afergs): Make condition just (pass == 0) or (rc_mode == CBR) -
- // UNLESS CBR starts allowing FIXED resizing. Then the resize
- // mode will need to get checked too.
- if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR &&
- oxcf->resize_mode == RESIZE_DYNAMIC)
- set_mv_search_params(cpi); // TODO(afergs): Needed? Caller calls after...
+ set_mv_search_params(cpi);
}
#if !CONFIG_XIPHRC
- if (oxcf->pass == 2) {
+ if (cpi->oxcf.pass == 2) {
av1_set_target_rate(cpi);
}
#endif
@@ -3820,18 +3835,29 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) {
"Failed to allocate frame buffer");
#if CONFIG_LOOP_RESTORATION
- set_restoration_tilesize(cm->width, cm->height, cm->rst_info);
+ set_restoration_tilesize(
+#if CONFIG_FRAME_SUPERRES
+ cm->superres_upscaled_width, cm->superres_upscaled_height,
+#else
+ cm->width, cm->height,
+#endif // CONFIG_FRAME_SUPERRES
+ cm->subsampling_x, cm->subsampling_y, cm->rst_info);
for (int i = 0; i < MAX_MB_PLANE; ++i)
cm->rst_info[i].frame_restoration_type = RESTORE_NONE;
av1_alloc_restoration_buffers(cm);
for (int i = 0; i < MAX_MB_PLANE; ++i) {
cpi->rst_search[i].restoration_tilesize =
cm->rst_info[i].restoration_tilesize;
- av1_alloc_restoration_struct(cm, &cpi->rst_search[i], cm->width,
- cm->height);
+ av1_alloc_restoration_struct(cm, &cpi->rst_search[i],
+#if CONFIG_FRAME_SUPERRES
+ cm->superres_upscaled_width,
+ cm->superres_upscaled_height);
+#else
+ cm->width, cm->height);
+#endif // CONFIG_FRAME_SUPERRES
}
-#endif // CONFIG_LOOP_RESTORATION
- alloc_util_frame_buffers(cpi);
+#endif // CONFIG_LOOP_RESTORATION
+ alloc_util_frame_buffers(cpi); // TODO(afergs): Remove? Gets called anyways.
init_motion_estimation(cpi);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
@@ -3857,6 +3883,12 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) {
ref_buf->buf = NULL;
}
}
+
+#if CONFIG_VAR_REFS
+ // Check duplicate reference frames
+ enc_check_valid_ref_frames(cpi);
+#endif // CONFIG_VAR_REFS
+
#if CONFIG_INTRABC
#if CONFIG_HIGHBITDEPTH
av1_setup_scale_factors_for_frame(&xd->sf_identity, cm->width, cm->height,
@@ -3872,84 +3904,167 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) {
}
static void setup_frame_size(AV1_COMP *cpi) {
- set_scaled_size(cpi);
+ int encode_width = cpi->oxcf.width;
+ int encode_height = cpi->oxcf.height;
+
+ uint8_t resize_num = av1_calculate_next_resize_scale(cpi);
+ av1_calculate_scaled_size(&encode_width, &encode_height, resize_num);
+
#if CONFIG_FRAME_SUPERRES
- int encode_width;
- int encode_height;
- av1_calculate_superres_size(cpi, &encode_width, &encode_height);
+ AV1_COMMON *cm = &cpi->common;
+ cm->superres_upscaled_width = encode_width;
+ cm->superres_upscaled_height = encode_height;
+ cm->superres_scale_numerator =
+ av1_calculate_next_superres_scale(cpi, encode_width, encode_width);
+ av1_calculate_scaled_size(&encode_width, &encode_height,
+ cm->superres_scale_numerator);
+#endif // CONFIG_FRAME_SUPERRES
+
set_frame_size(cpi, encode_width, encode_height);
+}
+
+#if CONFIG_FRAME_SUPERRES
+static void superres_post_encode(AV1_COMP *cpi) {
+ AV1_COMMON *cm = &cpi->common;
+
+ if (av1_superres_unscaled(cm)) return;
+
+ av1_superres_upscale(cm, NULL);
+
+ // If regular resizing is occurring the source will need to be downscaled to
+ // match the upscaled superres resolution. Otherwise the original source is
+ // used.
+ if (av1_resize_unscaled(cm)) {
+ cpi->source = cpi->unscaled_source;
+ if (cpi->last_source != NULL) cpi->last_source = cpi->unscaled_last_source;
+ } else {
+ assert(cpi->unscaled_source->y_crop_width != cm->superres_upscaled_width);
+ assert(cpi->unscaled_source->y_crop_height != cm->superres_upscaled_height);
+ // Do downscale. cm->(width|height) has been updated by av1_superres_upscale
+ if (aom_realloc_frame_buffer(
+ &cpi->scaled_source, cm->superres_upscaled_width,
+ cm->superres_upscaled_height, cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif // CONFIG_HIGHBITDEPTH
+ AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
+ aom_internal_error(
+ &cm->error, AOM_CODEC_MEM_ERROR,
+ "Failed to reallocate scaled source buffer for superres");
+ assert(cpi->scaled_source.y_crop_width == cm->superres_upscaled_width);
+ assert(cpi->scaled_source.y_crop_height == cm->superres_upscaled_height);
+#if CONFIG_HIGHBITDEPTH
+ av1_resize_and_extend_frame(cpi->unscaled_source, &cpi->scaled_source,
+ (int)cm->bit_depth);
#else
- set_frame_size(cpi, cpi->oxcf.scaled_frame_width,
- cpi->oxcf.scaled_frame_height);
-#endif // CONFIG_FRAME_SUPERRES
+ av1_resize_and_extend_frame(cpi->unscaled_source, &cpi->scaled_source);
+#endif // CONFIG_HIGHBITDEPTH
+ cpi->source = &cpi->scaled_source;
+ }
}
+#endif // CONFIG_FRAME_SUPERRES
-static void reset_use_upsampled_references(AV1_COMP *cpi) {
- MV_REFERENCE_FRAME ref_frame;
+static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
+ MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
+ struct loopfilter *lf = &cm->lf;
+ int no_loopfilter = 0;
- // reset up-sampled reference buffer structure.
- init_upsampled_ref_frame_bufs(cpi);
+ if (is_lossless_requested(&cpi->oxcf)) no_loopfilter = 1;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, ref_frame);
- int new_uidx = upsample_ref_frame(cpi, ref);
+#if CONFIG_EXT_TILE
+ // 0 loopfilter level is only necessary if individual tile
+ // decoding is required.
+ if (cm->single_tile_decoding) no_loopfilter = 1;
+#endif // CONFIG_EXT_TILE
+
+ if (no_loopfilter) {
+ lf->filter_level = 0;
+ } else {
+ struct aom_usec_timer timer;
+
+ aom_clear_system_state();
+
+ aom_usec_timer_start(&timer);
+
+ av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick);
+
+ aom_usec_timer_mark(&timer);
+ cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer);
+ }
- // Update the up-sampled reference index.
- cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)] = new_uidx;
- cpi->upsampled_ref_bufs[new_uidx].ref_count++;
+ if (lf->filter_level > 0) {
+#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4
+#if CONFIG_UV_LVL
+ av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+ av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_u, 1, 0);
+ av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_v, 2, 0);
+#else
+ av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+#endif // CONFIG_UV_LVL
+#else
+ if (cpi->num_workers > 1)
+ av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
+ lf->filter_level, 0, 0, cpi->workers,
+ cpi->num_workers, &cpi->lf_row_sync);
+ else
+ av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+#endif
+ }
+#if CONFIG_CDEF
+ if (is_lossless_requested(&cpi->oxcf)) {
+ cm->cdef_bits = 0;
+ cm->cdef_strengths[0] = 0;
+ cm->nb_cdef_strengths = 1;
+ } else {
+ // Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v
+ av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd,
+ cpi->oxcf.speed > 0);
+
+ // Apply the filter
+ av1_cdef_frame(cm->frame_to_show, cm, xd);
}
+#endif
+
+#if CONFIG_FRAME_SUPERRES
+ superres_post_encode(cpi);
+#endif // CONFIG_FRAME_SUPERRES
+
+#if CONFIG_LOOP_RESTORATION
+ av1_pick_filter_restoration(cpi->source, cpi, cpi->sf.lpf_pick);
+ if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
+ cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
+ cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
+ av1_loop_restoration_frame(cm->frame_to_show, cm, cm->rst_info, 7, 0, NULL);
+ }
+#endif // CONFIG_LOOP_RESTORATION
+ // TODO(debargha): Fix mv search range on encoder side
+ // aom_extend_frame_inner_borders(cm->frame_to_show);
+ aom_extend_frame_borders(cm->frame_to_show);
}
static void encode_without_recode_loop(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
int q = 0, bottom_index = 0, top_index = 0; // Dummy variables.
- const int use_upsampled_ref = cpi->sf.use_upsampled_references;
aom_clear_system_state();
-#if CONFIG_FRAME_SUPERRES
- // TODO(afergs): Figure out when is actually a good time to do superres
- cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR;
- // (uint8_t)(rand() % 9 + SUPERRES_SCALE_NUMERATOR_MIN);
- cpi->superres_pending = cpi->oxcf.superres_enabled && 0;
-#endif // CONFIG_FRAME_SUPERRES
-
+ set_size_independent_vars(cpi);
setup_frame_size(cpi);
- av1_resize_step(cpi);
-
- // For 1 pass CBR under dynamic resize mode: use faster scaling for source.
- // Only for 2x2 scaling for now.
- if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == AOM_CBR &&
- cpi->oxcf.resize_mode == RESIZE_DYNAMIC &&
- cpi->un_scaled_source->y_width == (cm->width << 1) &&
- cpi->un_scaled_source->y_height == (cm->height << 1)) {
- cpi->source = av1_scale_if_required_fast(cm, cpi->un_scaled_source,
- &cpi->scaled_source);
- if (cpi->unscaled_last_source != NULL)
- cpi->last_source = av1_scale_if_required_fast(
- cm, cpi->unscaled_last_source, &cpi->scaled_last_source);
- } else {
- cpi->source =
- av1_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source);
- if (cpi->unscaled_last_source != NULL)
- cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
- &cpi->scaled_last_source);
- }
+ assert(cm->width == cpi->scaled_source.y_crop_width);
+ assert(cm->height == cpi->scaled_source.y_crop_height);
+
+ set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+
+ cpi->source =
+ av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
+ if (cpi->unscaled_last_source != NULL)
+ cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
+ &cpi->scaled_last_source);
if (frame_is_intra_only(cm) == 0) {
av1_scale_references(cpi);
}
- set_size_independent_vars(cpi);
- set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
-
- // cpi->sf.use_upsampled_references can be different from frame to frame.
- // Every time when cpi->sf.use_upsampled_references is changed from 0 to 1.
- // The reference frames for this frame have to be up-sampled before encoding.
- if (!use_upsampled_ref && cpi->sf.use_upsampled_references &&
- cm->frame_type != KEY_FRAME)
- reset_use_upsampled_references(cpi);
-
av1_set_quantizer(cm, q);
setup_frame(cpi);
suppress_active_map(cpi);
@@ -3968,11 +4083,6 @@ static void encode_without_recode_loop(AV1_COMP *cpi) {
// transform / motion compensation build reconstruction frame
av1_encode_frame(cpi);
-#if CONFIG_FRAME_SUPERRES
- // TODO(afergs): Upscale the frame to show
- cpi->superres_pending = 0;
-#endif // CONFIG_FRAME_SUPERRES
-
// Update some stats from cyclic refresh, and check if we should not update
// golden reference, for 1 pass CBR.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME &&
@@ -4000,7 +4110,6 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
int frame_over_shoot_limit;
int frame_under_shoot_limit;
int q = 0, q_low = 0, q_high = 0;
- const int use_upsampled_ref = cpi->sf.use_upsampled_references;
set_size_independent_vars(cpi);
@@ -4009,22 +4118,9 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
setup_frame_size(cpi);
-#if CONFIG_FRAME_SUPERRES
- if (loop_count == 0 || av1_resize_pending(cpi) || cpi->superres_pending) {
-#else
- if (loop_count == 0 || av1_resize_pending(cpi)) {
-#endif // CONFIG_FRAME_SUPERRES
+ if (loop_count == 0) {
set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
- // cpi->sf.use_upsampled_references can be different from frame to frame.
- // Every time when cpi->sf.use_upsampled_references is changed from 0 to
- // 1.
- // The reference frames for this frame have to be up-sampled before
- // encoding.
- if (!use_upsampled_ref && cpi->sf.use_upsampled_references &&
- cm->frame_type != KEY_FRAME)
- reset_use_upsampled_references(cpi);
-
// TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
set_mv_search_params(cpi);
@@ -4034,9 +4130,6 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
undershoot_seen = 0;
#endif
- // Advance resize to next state now that updates are done
- av1_resize_step(cpi);
-
q_low = bottom_index;
q_high = top_index;
@@ -4051,8 +4144,7 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
}
cpi->source =
- av1_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source);
-
+ av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
if (cpi->unscaled_last_source != NULL)
cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
&cpi->scaled_last_source);
@@ -4174,8 +4266,6 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
#if !CONFIG_XIPHRC
int retries = 0;
- // TODO(afergs): Replace removed recode when av1_resize_pending is true
-
// Frame size out of permitted range:
// Update correction factor & compute new Q to try...
// Frame is too large
@@ -4285,7 +4375,7 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) {
map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]];
const int gld_is_last2 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[1]];
const int gld_is_last3 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[2]];
-#else
+#else // !CONFIG_ONE_SIDED_COMPOUND
const int bwd_is_last = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idxes[0]];
const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]];
@@ -4299,12 +4389,12 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) {
const int bwd_is_gld = map[cpi->bwd_fb_idx] == map[cpi->gld_fb_idx];
-#endif
+#endif // CONFIG_ONE_SIDED_COMPOUND
const int last2_is_alt = map[cpi->lst_fb_idxes[1]] == map[cpi->alt_fb_idx];
const int last3_is_alt = map[cpi->lst_fb_idxes[2]] == map[cpi->alt_fb_idx];
const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
const int bwd_is_alt = map[cpi->bwd_fb_idx] == map[cpi->alt_fb_idx];
-#else
+#else // !CONFIG_EXT_REFS
const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
@@ -4476,11 +4566,14 @@ static void dump_filtered_recon_frames(AV1_COMP *cpi) {
}
printf(
"\nFrame=%5d, encode_update_type[%5d]=%1d, show_existing_frame=%d, "
- "y_stride=%4d, uv_stride=%4d, width=%4d, height=%4d\n",
+ "source_alt_ref_active=%d, refresh_alt_ref_frame=%d, rf_level=%d, "
+ "y_stride=%4d, uv_stride=%4d, cm->width=%4d, cm->height=%4d\n",
cm->current_video_frame, cpi->twopass.gf_group.index,
cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
- cm->show_existing_frame, recon_buf->y_stride, recon_buf->uv_stride,
- cm->width, cm->height);
+ cm->show_existing_frame, cpi->rc.source_alt_ref_active,
+ cpi->refresh_alt_ref_frame,
+ cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index],
+ recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height);
// --- Y ---
for (h = 0; h < cm->height; ++h) {
@@ -4502,8 +4595,6 @@ static void dump_filtered_recon_frames(AV1_COMP *cpi) {
}
#endif // DUMP_RECON_FRAMES
-#if CONFIG_EC_ADAPT
-
static void make_update_tile_list_enc(AV1_COMP *cpi, const int tile_rows,
const int tile_cols,
FRAME_CONTEXT *ec_ctxs[]) {
@@ -4512,7 +4603,6 @@ static void make_update_tile_list_enc(AV1_COMP *cpi, const int tile_rows,
ec_ctxs[i] = &cpi->tile_data[i].tctx;
}
-#endif
static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
uint8_t *dest, int skip_adapt,
unsigned int *frame_flags) {
@@ -4520,13 +4610,11 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
struct segmentation *const seg = &cm->seg;
TX_SIZE t;
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT **tile_ctxs = aom_malloc(cm->tile_rows * cm->tile_cols *
sizeof(&cpi->tile_data[0].tctx));
aom_cdf_prob **cdf_ptrs =
aom_malloc(cm->tile_rows * cm->tile_cols *
sizeof(&cpi->tile_data[0].tctx.partition_cdf[0][0]));
-#endif
#if CONFIG_XIPHRC
int frame_type;
int drop_this_frame = 0;
@@ -4610,15 +4698,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
#endif
}
- cm->last_width = cm->width;
- cm->last_height = cm->height;
-
++cm->current_video_frame;
-#if CONFIG_EC_ADAPT
aom_free(tile_ctxs);
aom_free(cdf_ptrs);
-#endif
return;
}
#endif // CONFIG_EXT_REFS
@@ -4654,7 +4737,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
cm->reset_frame_context = RESET_FRAME_CONTEXT_CURRENT;
}
}
-#if CONFIG_TILE_GROUPS
if (cpi->oxcf.mtu == 0) {
cm->num_tg = cpi->oxcf.num_tile_groups;
} else {
@@ -4662,20 +4744,18 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
// updates
cm->num_tg = DEFAULT_MAX_NUM_TG;
}
-#endif
#if CONFIG_EXT_TILE
- cm->tile_encoding_mode = cpi->oxcf.tile_encoding_mode;
+ cm->large_scale_tile = cpi->oxcf.large_scale_tile;
+ cm->single_tile_decoding = cpi->oxcf.single_tile_decoding;
#endif // CONFIG_EXT_TILE
#if CONFIG_XIPHRC
if (drop_this_frame) {
av1_rc_postencode_update_drop_frame(cpi);
++cm->current_video_frame;
-#if CONFIG_EC_ADAPT
aom_free(tile_ctxs);
aom_free(cdf_ptrs);
-#endif
return;
}
#else
@@ -4686,10 +4766,8 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
if (av1_rc_drop_frame(cpi)) {
av1_rc_postencode_update_drop_frame(cpi);
++cm->current_video_frame;
-#if CONFIG_EC_ADAPT
aom_free(tile_ctxs);
aom_free(cdf_ptrs);
-#endif
return;
}
}
@@ -4770,6 +4848,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
cm->frame_to_show = get_frame_new_buffer(cm);
cm->frame_to_show->color_space = cm->color_space;
+#if CONFIG_COLORSPACE_HEADERS
+ cm->frame_to_show->transfer_function = cm->transfer_function;
+ cm->frame_to_show->chroma_sample_position = cm->chroma_sample_position;
+#endif
cm->frame_to_show->color_range = cm->color_range;
cm->frame_to_show->render_width = cm->render_width;
cm->frame_to_show->render_height = cm->render_height;
@@ -4786,10 +4868,8 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
av1_pack_bitstream(cpi, dest, size);
if (skip_adapt) {
-#if CONFIG_EC_ADAPT
aom_free(tile_ctxs);
aom_free(cdf_ptrs);
-#endif
return;
}
@@ -4823,11 +4903,13 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
cpi->td.rd_counts.coef_counts[t]);
#if CONFIG_ENTROPY_STATS
av1_accumulate_frame_counts(&aggregate_fc, &cm->counts);
+ assert(cm->frame_context_idx < FRAME_CONTEXTS);
+ av1_accumulate_frame_counts(&aggregate_fc_per_type[cm->frame_context_idx],
+ &cm->counts);
#endif // CONFIG_ENTROPY_STATS
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
av1_adapt_coef_probs(cm);
av1_adapt_intra_frame_probs(cm);
-#if CONFIG_EC_ADAPT
make_update_tile_list_enc(cpi, cm->tile_rows, cm->tile_cols, tile_ctxs);
av1_average_tile_coef_cdfs(cpi->common.fc, tile_ctxs, cdf_ptrs,
cm->tile_rows * cm->tile_cols);
@@ -4837,7 +4919,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
av1_average_tile_pvq_cdfs(cpi->common.fc, tile_ctxs,
cm->tile_rows * cm->tile_cols);
#endif // CONFIG_PVQ
-#endif // CONFIG_EC_ADAPT
#if CONFIG_ADAPT_SCAN
av1_adapt_scan_order(cm);
#endif // CONFIG_ADAPT_SCAN
@@ -4847,12 +4928,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
av1_adapt_inter_frame_probs(cm);
av1_adapt_mv_probs(cm, cm->allow_high_precision_mv);
-#if CONFIG_EC_ADAPT
av1_average_tile_inter_cdfs(&cpi->common, cpi->common.fc, tile_ctxs,
cdf_ptrs, cm->tile_rows * cm->tile_cols);
av1_average_tile_mv_cdfs(cpi->common.fc, tile_ctxs, cdf_ptrs,
cm->tile_rows * cm->tile_cols);
-#endif
}
}
@@ -4888,10 +4967,8 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
if (drop_this_frame) {
av1_rc_postencode_update_drop_frame(cpi);
++cm->current_video_frame;
-#if CONFIG_EC_ADAPT
aom_free(tile_ctxs);
aom_free(cdf_ptrs);
-#endif
return;
}
#else // !CONFIG_XIPHRC
@@ -4915,13 +4992,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
cm->seg.update_data = 0;
cm->lf.mode_ref_delta_update = 0;
- // keep track of the last coded dimensions
- cm->last_width = cm->width;
- cm->last_height = cm->height;
-
- // reset to normal state now that we are done.
- if (!cm->show_existing_frame) cm->last_show_frame = cm->show_frame;
-
if (cm->show_frame) {
#if CONFIG_EXT_REFS
// TODO(zoeliu): We may only swamp mi and prev_mi for those frames that are
@@ -4935,13 +5005,20 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
#if CONFIG_EXT_REFS
// NOTE: Shall not refer to any frame not used as reference.
- if (cm->is_reference_frame)
+ if (cm->is_reference_frame) {
#endif // CONFIG_EXT_REFS
cm->prev_frame = cm->cur_frame;
-#if CONFIG_EC_ADAPT
+ // keep track of the last coded dimensions
+ cm->last_width = cm->width;
+ cm->last_height = cm->height;
+
+ // reset to normal state now that we are done.
+ cm->last_show_frame = cm->show_frame;
+#if CONFIG_EXT_REFS
+ }
+#endif // CONFIG_EXT_REFS
aom_free(tile_ctxs);
aom_free(cdf_ptrs);
-#endif
}
static void Pass0Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest,
@@ -5220,12 +5297,17 @@ static void adjust_image_stat(double y, double u, double v, double all,
s->worst = AOMMIN(s->worst, all);
}
-static void compute_internal_stats(AV1_COMP *cpi) {
+static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) {
AV1_COMMON *const cm = &cpi->common;
double samples = 0.0;
uint32_t in_bit_depth = 8;
uint32_t bit_depth = 8;
+#if CONFIG_INTER_STATS_ONLY
+ if (cm->frame_type == KEY_FRAME) return; // skip key frame
+#endif
+ cpi->bytes += frame_bytes;
+
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) {
in_bit_depth = cpi->oxcf.input_bit_depth;
@@ -5413,8 +5495,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
if (cpi->b_calculate_psnr) generate_psnr_packet(cpi);
#if CONFIG_INTERNAL_STATS
- compute_internal_stats(cpi);
- cpi->bytes += (int)(*size);
+ compute_internal_stats(cpi, (int)(*size));
#endif // CONFIG_INTERNAL_STATS
// Clear down mmx registers
@@ -5448,8 +5529,17 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
cpi->alt_ref_source = source;
if (oxcf->arnr_max_frames > 0) {
- // Produce the filtered ARF frame.
- av1_temporal_filter(cpi, arf_src_index);
+// Produce the filtered ARF frame.
+#if CONFIG_BGSPRITE
+ int bgsprite_ret = av1_background_sprite(cpi, arf_src_index);
+ // Do temporal filter if bgsprite not generated.
+ if (bgsprite_ret != 0)
+#endif // CONFIG_BGSPRITE
+ av1_temporal_filter(cpi,
+#if CONFIG_BGSPRITE
+ NULL,
+#endif // CONFIG_BGSPRITE
+ arf_src_index);
aom_extend_frame_borders(&cpi->alt_ref_buffer);
force_src_buffer = &cpi->alt_ref_buffer;
}
@@ -5489,7 +5579,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
if ((last_source = av1_lookahead_peek(cpi->lookahead, -1)) == NULL)
return -1;
}
-
+ if (cm->current_video_frame > 0) assert(last_source != NULL);
// Read in the source frame.
source = av1_lookahead_pop(cpi->lookahead, flush);
@@ -5501,11 +5591,9 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
check_src_altref(cpi, source);
}
}
-
if (source) {
- cpi->un_scaled_source = cpi->source =
+ cpi->unscaled_source = cpi->source =
force_src_buffer ? force_src_buffer : &source->img;
-
cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
*time_stamp = source->ts_start;
@@ -5576,7 +5664,6 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
av1_rc_get_second_pass_params(cpi);
} else if (oxcf->pass == 1) {
setup_frame_size(cpi);
- av1_resize_step(cpi);
}
#endif
@@ -5645,8 +5732,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
#if CONFIG_INTERNAL_STATS
if (oxcf->pass != 1) {
- compute_internal_stats(cpi);
- cpi->bytes += (int)(*size);
+ compute_internal_stats(cpi, (int)(*size));
}
#endif // CONFIG_INTERNAL_STATS
@@ -5712,9 +5798,10 @@ int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
int av1_set_size_literal(AV1_COMP *cpi, int width, int height) {
AV1_COMMON *cm = &cpi->common;
#if CONFIG_HIGHBITDEPTH
- check_initial_width(cpi, cm->use_highbitdepth, 1, 1);
+ check_initial_width(cpi, cm->use_highbitdepth, cm->subsampling_x,
+ cm->subsampling_y);
#else
- check_initial_width(cpi, 1, 1);
+ check_initial_width(cpi, cm->subsampling_x, cm->subsampling_y);
#endif // CONFIG_HIGHBITDEPTH
if (width <= 0 || height <= 0) return 1;
diff --git a/third_party/aom/av1/encoder/encoder.h b/third_party/aom/av1/encoder/encoder.h
index ee1257c2d..9b98975b7 100644
--- a/third_party/aom/av1/encoder/encoder.h
+++ b/third_party/aom/av1/encoder/encoder.h
@@ -21,6 +21,7 @@
#include "av1/common/entropymode.h"
#include "av1/common/thread_common.h"
#include "av1/common/onyxc_int.h"
+#include "av1/common/resize.h"
#include "av1/encoder/aq_cyclicrefresh.h"
#if CONFIG_ANS
#include "aom_dsp/ans.h"
@@ -52,6 +53,10 @@
extern "C" {
#endif
+#if CONFIG_SPEED_REFS
+#define MIN_SPEED_REFS_BLKSIZE BLOCK_16X16
+#endif // CONFIG_SPEED_REFS
+
typedef struct {
int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
@@ -128,7 +133,14 @@ typedef enum {
RESIZE_NONE = 0, // No frame resizing allowed.
RESIZE_FIXED = 1, // All frames are coded at the specified dimension.
RESIZE_DYNAMIC = 2 // Coded size of each frame is determined by the codec.
-} RESIZE_TYPE;
+} RESIZE_MODE;
+#if CONFIG_FRAME_SUPERRES
+typedef enum {
+ SUPERRES_NONE = 0,
+ SUPERRES_FIXED = 1,
+ SUPERRES_DYNAMIC = 2
+} SUPERRES_MODE;
+#endif // CONFIG_FRAME_SUPERRES
typedef struct AV1EncoderConfig {
BITSTREAM_PROFILE profile;
@@ -190,22 +202,22 @@ typedef struct AV1EncoderConfig {
int qm_minlevel;
int qm_maxlevel;
#endif
-#if CONFIG_TILE_GROUPS
unsigned int num_tile_groups;
unsigned int mtu;
-#endif
#if CONFIG_TEMPMV_SIGNALING
unsigned int disable_tempmv;
#endif
// Internal frame size scaling.
- RESIZE_TYPE resize_mode;
- int scaled_frame_width;
- int scaled_frame_height;
+ RESIZE_MODE resize_mode;
+ uint8_t resize_scale_numerator;
+ uint8_t resize_kf_scale_numerator;
#if CONFIG_FRAME_SUPERRES
- // Frame Super-Resolution size scaling
- int superres_enabled;
+ // Frame Super-Resolution size scaling.
+ SUPERRES_MODE superres_mode;
+ uint8_t superres_scale_numerator;
+ uint8_t superres_kf_scale_numerator;
#endif // CONFIG_FRAME_SUPERRES
// Enable feature to reduce the frame quantization every x frames.
@@ -265,6 +277,10 @@ typedef struct AV1EncoderConfig {
int use_highbitdepth;
#endif
aom_color_space_t color_space;
+#if CONFIG_COLORSPACE_HEADERS
+ aom_transfer_function_t transfer_function;
+ aom_chroma_sample_position_t chroma_sample_position;
+#endif
int color_range;
int render_width;
int render_height;
@@ -276,7 +292,8 @@ typedef struct AV1EncoderConfig {
int ans_window_size_log2;
#endif // CONFIG_ANS && ANS_MAX_SYMBOLS
#if CONFIG_EXT_TILE
- unsigned int tile_encoding_mode;
+ unsigned int large_scale_tile;
+ unsigned int single_tile_decoding;
#endif // CONFIG_EXT_TILE
unsigned int motion_vector_unit_test;
@@ -289,8 +306,8 @@ static INLINE int is_lossless_requested(const AV1EncoderConfig *cfg) {
// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
typedef struct TileDataEnc {
TileInfo tile_info;
- int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
- int mode_map[BLOCK_SIZES][MAX_MODES];
+ int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES];
+ int mode_map[BLOCK_SIZES_ALL][MAX_MODES];
int m_search_count;
int ex_search_count;
#if CONFIG_PVQ
@@ -299,9 +316,7 @@ typedef struct TileDataEnc {
#if CONFIG_CFL
CFL_CTX cfl;
#endif
-#if CONFIG_EC_ADAPT
DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
-#endif
} TileDataEnc;
typedef struct RD_COUNTS {
@@ -311,6 +326,8 @@ typedef struct RD_COUNTS {
// Stores number of 4x4 blocks using global motion per reference frame.
int global_motion_used[TOTAL_REFS_PER_FRAME];
#endif // CONFIG_GLOBAL_MOTION
+ int single_ref_used_flag;
+ int compound_ref_used_flag;
} RD_COUNTS;
typedef struct ThreadData {
@@ -372,18 +389,11 @@ typedef struct AV1_COMP {
YV12_BUFFER_CONFIG *source;
YV12_BUFFER_CONFIG *last_source; // NULL for first frame and alt_ref frames
- YV12_BUFFER_CONFIG *un_scaled_source;
+ YV12_BUFFER_CONFIG *unscaled_source;
YV12_BUFFER_CONFIG scaled_source;
YV12_BUFFER_CONFIG *unscaled_last_source;
YV12_BUFFER_CONFIG scaled_last_source;
- // Up-sampled reference buffers
- // NOTE(zoeliu): It is needed to allocate sufficient space to the up-sampled
- // reference buffers, which should include the up-sampled version of all the
- // possibly stored references plus the currently coded frame itself.
- EncRefCntBuffer upsampled_ref_bufs[REF_FRAMES + 1];
- int upsampled_ref_idx[REF_FRAMES + 1];
-
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
@@ -471,7 +481,7 @@ typedef struct AV1_COMP {
fractional_mv_step_fp *find_fractional_mv_step;
av1_full_search_fn_t full_search_sad; // It is currently unused.
av1_diamond_search_fn_t diamond_search_sad;
- aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
+ aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL];
uint64_t time_receive_data;
uint64_t time_compress_data;
uint64_t time_pick_lpf;
@@ -538,17 +548,24 @@ typedef struct AV1_COMP {
#if CONFIG_EXT_INTER
unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS]
[INTER_COMPOUND_MODES];
+#if CONFIG_COMPOUND_SINGLEREF
+ unsigned int inter_singleref_comp_mode_cost[INTER_MODE_CONTEXTS]
+ [INTER_SINGLEREF_COMP_MODES];
+#endif // CONFIG_COMPOUND_SINGLEREF
#if CONFIG_INTERINTRA
unsigned int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
#endif // CONFIG_INTERINTRA
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- int motion_mode_cost[BLOCK_SIZES][MOTION_MODES];
+ int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
- int motion_mode_cost1[BLOCK_SIZES][2];
+ int motion_mode_cost1[BLOCK_SIZES_ALL][2];
#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
+ int ncobmc_mode_cost[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES];
+#endif // CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES];
+ int intra_uv_mode_cost[INTRA_MODES][UV_INTRA_MODES];
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
#if CONFIG_EXT_PARTITION_TYPES
@@ -601,18 +618,10 @@ typedef struct AV1_COMP {
TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
int resize_state;
- int resize_scale_num;
- int resize_scale_den;
- int resize_next_scale_num;
- int resize_next_scale_den;
int resize_avg_qp;
int resize_buffer_underflow;
int resize_count;
-#if CONFIG_FRAME_SUPERRES
- int superres_pending;
-#endif // CONFIG_FRAME_SUPERRES
-
// VARIANCE_AQ segment map refresh
int vaq_refresh;
@@ -640,6 +649,15 @@ typedef struct AV1_COMP {
#if CONFIG_LV_MAP
tran_low_t *tcoeff_buf[MAX_MB_PLANE];
#endif
+
+#if CONFIG_SPEED_REFS
+ int sb_scanning_pass_idx;
+#endif // CONFIG_SPEED_REFS
+
+#if CONFIG_FLEX_REFS
+ int extra_arf_allowed;
+ int bwd_ref_allowed;
+#endif // CONFIG_FLEX_REFS
} AV1_COMP;
void av1_initialize_enc(void);
@@ -729,14 +747,6 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
: NULL;
}
-static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(
- const AV1_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) {
- // Use up-sampled reference frames.
- const int buf_idx =
- cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)];
- return &cpi->upsampled_ref_bufs[buf_idx].buf;
-}
-
#if CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
static INLINE int enc_is_ref_frame_buf(AV1_COMP *cpi, RefCntBuffer *frame_buf) {
MV_REFERENCE_FRAME ref_frame;
@@ -831,23 +841,22 @@ static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
ubufs[new_uidx].ref_count++;
}
-// Returns 1 if a resize is pending and 0 otherwise.
-static INLINE int av1_resize_pending(const struct AV1_COMP *cpi) {
- return cpi->resize_scale_num != cpi->resize_next_scale_num ||
- cpi->resize_scale_den != cpi->resize_next_scale_den;
-}
-
// Returns 1 if a frame is unscaled and 0 otherwise.
-static INLINE int av1_resize_unscaled(const struct AV1_COMP *cpi) {
- return cpi->resize_scale_num == cpi->resize_scale_den;
+static INLINE int av1_resize_unscaled(const AV1_COMMON *cm) {
+#if CONFIG_FRAME_SUPERRES
+ return cm->superres_upscaled_width == cm->render_width &&
+ cm->superres_upscaled_height == cm->render_height;
+#else
+ return cm->width == cm->render_width && cm->height == cm->render_height;
+#endif // CONFIG_FRAME_SUPERRES
}
-// Moves resizing to the next state. This is just setting the numerator and
-// denominator to the next numerator and denominator, causing
-// av1_resize_pending to subsequently return false.
-static INLINE void av1_resize_step(struct AV1_COMP *cpi) {
- cpi->resize_scale_num = cpi->resize_next_scale_num;
- cpi->resize_scale_den = cpi->resize_next_scale_den;
+static INLINE int av1_frame_unscaled(const AV1_COMMON *cm) {
+#if CONFIG_FRAME_SUPERRES
+ return av1_superres_unscaled(cm) && av1_resize_unscaled(cm);
+#else
+ return av1_resize_unscaled(cm);
+#endif // CONFIG_FRAME_SUPERRES
}
#ifdef __cplusplus
diff --git a/third_party/aom/av1/encoder/encodetxb.c b/third_party/aom/av1/encoder/encodetxb.c
index 731642064..3aa4c183e 100644
--- a/third_party/aom/av1/encoder/encodetxb.c
+++ b/third_party/aom/av1/encoder/encodetxb.c
@@ -70,38 +70,43 @@ static void write_golomb(aom_writer *w, int level) {
}
void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
- aom_writer *w, int block, int plane,
- const tran_low_t *tcoeff, uint16_t eob,
- TXB_CTX *txb_ctx) {
+ aom_writer *w, int blk_row, int blk_col, int block,
+ int plane, TX_SIZE tx_size, const tran_low_t *tcoeff,
+ uint16_t eob, TXB_CTX *txb_ctx) {
aom_prob *nz_map;
aom_prob *eob_flag;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_SIZE tx_size = get_tx_size(plane, xd);
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+ const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+ const TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
const int16_t *scan = scan_order->scan;
+ const int16_t *iscan = scan_order->iscan;
int c;
int is_nz;
const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+ const int height = tx_size_high[tx_size];
const int seg_eob = tx_size_2d[tx_size];
- uint8_t txb_mask[32 * 32] = { 0 };
uint16_t update_eob = 0;
- aom_write(w, eob == 0, cm->fc->txb_skip[tx_size][txb_ctx->txb_skip_ctx]);
+ (void)blk_row;
+ (void)blk_col;
+
+ aom_write(w, eob == 0, cm->fc->txb_skip[txs_ctx][txb_ctx->txb_skip_ctx]);
if (eob == 0) return;
#if CONFIG_TXK_SEL
- av1_write_tx_type(cm, xd, block, plane, w);
+ av1_write_tx_type(cm, xd, blk_row, blk_col, block, plane,
+ get_min_tx_size(tx_size), w);
#endif
- nz_map = cm->fc->nz_map[tx_size][plane_type];
- eob_flag = cm->fc->eob_flag[tx_size][plane_type];
+ nz_map = cm->fc->nz_map[txs_ctx][plane_type];
+ eob_flag = cm->fc->eob_flag[txs_ctx][plane_type];
for (c = 0; c < eob; ++c) {
- int coeff_ctx = get_nz_map_ctx(tcoeff, txb_mask, scan[c], bwl);
- int eob_ctx = get_eob_ctx(tcoeff, scan[c], bwl);
+ int coeff_ctx = get_nz_map_ctx(tcoeff, scan[c], bwl, height, iscan);
+ int eob_ctx = get_eob_ctx(tcoeff, scan[c], txs_ctx);
tran_low_t v = tcoeff[scan[c]];
is_nz = (v != 0);
@@ -113,12 +118,11 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
if (is_nz) {
aom_write(w, c == (eob - 1), eob_flag[eob_ctx]);
}
- txb_mask[scan[c]] = 1;
}
int i;
for (i = 0; i < NUM_BASE_LEVELS; ++i) {
- aom_prob *coeff_base = cm->fc->coeff_base[tx_size][plane_type][i];
+ aom_prob *coeff_base = cm->fc->coeff_base[txs_ctx][plane_type][i];
update_eob = 0;
for (c = eob - 1; c >= 0; --c) {
@@ -129,7 +133,7 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
if (level <= i) continue;
- ctx = get_base_ctx(tcoeff, scan[c], bwl, i + 1);
+ ctx = get_base_ctx(tcoeff, scan[c], bwl, height, i + 1);
if (level == i + 1) {
aom_write(w, 1, coeff_base[ctx]);
@@ -161,13 +165,13 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
}
// level is above 1.
- ctx = get_br_ctx(tcoeff, scan[c], bwl);
+ ctx = get_br_ctx(tcoeff, scan[c], bwl, height);
for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
if (level == (idx + 1 + NUM_BASE_LEVELS)) {
- aom_write(w, 1, cm->fc->coeff_lps[tx_size][plane_type][ctx]);
+ aom_write(w, 1, cm->fc->coeff_lps[txs_ctx][plane_type][ctx]);
break;
}
- aom_write(w, 0, cm->fc->coeff_lps[tx_size][plane_type][ctx]);
+ aom_write(w, 0, cm->fc->coeff_lps[txs_ctx][plane_type][ctx]);
}
if (idx < COEFF_BASE_RANGE) continue;
@@ -183,7 +187,10 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x,
BLOCK_SIZE bsize = mbmi->sb_type;
struct macroblockd_plane *pd = &xd->plane[plane];
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_SUB8X8
+ const BLOCK_SIZE plane_bsize =
+ AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
+#elif CONFIG_CB4X4
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
#else
const BLOCK_SIZE plane_bsize =
@@ -191,7 +198,7 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x,
#endif
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
- TX_SIZE tx_size = get_tx_size(plane, xd);
+ const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
const int bkw = tx_size_wide_unit[tx_size];
const int bkh = tx_size_high_unit[tx_size];
const int step = tx_size_wide_unit[tx_size] * tx_size_high_unit[tx_size];
@@ -203,7 +210,8 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x,
uint16_t eob = x->mbmi_ext->eobs[plane][block];
TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block],
x->mbmi_ext->dc_sign_ctx[plane][block] };
- av1_write_coeffs_txb(cm, xd, w, block, plane, tcoeff, eob, &txb_ctx);
+ av1_write_coeffs_txb(cm, xd, w, row, col, block, plane, tx_size, tcoeff,
+ eob, &txb_ctx);
block += step;
}
}
@@ -211,7 +219,7 @@ void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x,
static INLINE void get_base_ctx_set(const tran_low_t *tcoeffs,
int c, // raster order
- const int bwl,
+ const int bwl, const int height,
int ctx_set[NUM_BASE_LEVELS]) {
const int row = c >> bwl;
const int col = c - (row << bwl);
@@ -226,7 +234,7 @@ static INLINE void get_base_ctx_set(const tran_low_t *tcoeffs,
int ref_col = col + base_ref_offset[idx][1];
int pos = (ref_row << bwl) + ref_col;
- if (ref_row < 0 || ref_col < 0 || ref_row >= stride || ref_col >= stride)
+ if (ref_row < 0 || ref_col < 0 || ref_row >= height || ref_col >= stride)
continue;
abs_coeff = abs(tcoeffs[pos]);
@@ -280,12 +288,14 @@ static INLINE int get_base_cost(tran_low_t abs_qc, int ctx,
}
int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
- int block, TXB_CTX *txb_ctx) {
+ int blk_row, int blk_col, int block, TX_SIZE tx_size,
+ TXB_CTX *txb_ctx) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- const TX_SIZE tx_size = get_tx_size(plane, xd);
+ TX_SIZE txs_ctx = get_txsize_context(tx_size);
const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const struct macroblock_plane *p = &x->plane[plane];
const int eob = p->eobs[block];
@@ -293,27 +303,26 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
int c, cost;
const int seg_eob = AOMMIN(eob, tx_size_2d[tx_size] - 1);
int txb_skip_ctx = txb_ctx->txb_skip_ctx;
- aom_prob *nz_map = xd->fc->nz_map[tx_size][plane_type];
+ aom_prob *nz_map = xd->fc->nz_map[txs_ctx][plane_type];
const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
- // txb_mask is only initialized for once here. After that, it will be set when
- // coding zero map and then reset when coding level 1 info.
- uint8_t txb_mask[32 * 32] = { 0 };
+ const int height = tx_size_high[tx_size];
+
aom_prob(*coeff_base)[COEFF_BASE_CONTEXTS] =
- xd->fc->coeff_base[tx_size][plane_type];
+ xd->fc->coeff_base[txs_ctx][plane_type];
- const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
const int16_t *scan = scan_order->scan;
+ const int16_t *iscan = scan_order->iscan;
cost = 0;
if (eob == 0) {
- cost = av1_cost_bit(xd->fc->txb_skip[tx_size][txb_skip_ctx], 1);
+ cost = av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_skip_ctx], 1);
return cost;
}
- cost = av1_cost_bit(xd->fc->txb_skip[tx_size][txb_skip_ctx], 0);
+ cost = av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_skip_ctx], 0);
#if CONFIG_TXK_SEL
cost += av1_tx_type_cost(cpi, xd, mbmi->sb_type, plane, tx_size, tx_type);
@@ -325,7 +334,7 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
int level = abs(v);
if (c < seg_eob) {
- int coeff_ctx = get_nz_map_ctx(qcoeff, txb_mask, scan[c], bwl);
+ int coeff_ctx = get_nz_map_ctx(qcoeff, scan[c], bwl, height, iscan);
cost += av1_cost_bit(nz_map[coeff_ctx], is_nz);
}
@@ -342,7 +351,7 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
cost += av1_cost_bit(128, sign);
}
- get_base_ctx_set(qcoeff, scan[c], bwl, ctx_ls);
+ get_base_ctx_set(qcoeff, scan[c], bwl, height, ctx_ls);
int i;
for (i = 0; i < NUM_BASE_LEVELS; ++i) {
@@ -359,15 +368,15 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
int idx;
int ctx;
- ctx = get_br_ctx(qcoeff, scan[c], bwl);
+ ctx = get_br_ctx(qcoeff, scan[c], bwl, height);
for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
if (level == (idx + 1 + NUM_BASE_LEVELS)) {
cost +=
- av1_cost_bit(xd->fc->coeff_lps[tx_size][plane_type][ctx], 1);
+ av1_cost_bit(xd->fc->coeff_lps[txs_ctx][plane_type][ctx], 1);
break;
}
- cost += av1_cost_bit(xd->fc->coeff_lps[tx_size][plane_type][ctx], 0);
+ cost += av1_cost_bit(xd->fc->coeff_lps[txs_ctx][plane_type][ctx], 0);
}
if (idx >= COEFF_BASE_RANGE) {
@@ -389,13 +398,11 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
}
if (c < seg_eob) {
- int eob_ctx = get_eob_ctx(qcoeff, scan[c], bwl);
- cost += av1_cost_bit(xd->fc->eob_flag[tx_size][plane_type][eob_ctx],
+ int eob_ctx = get_eob_ctx(qcoeff, scan[c], txs_ctx);
+ cost += av1_cost_bit(xd->fc->eob_flag[txs_ctx][plane_type][eob_ctx],
c == (eob - 1));
}
}
-
- txb_mask[scan[c]] = 1;
}
return cost;
@@ -409,26 +416,26 @@ static INLINE int has_base(tran_low_t qc, int base_idx) {
static void gen_base_count_mag_arr(int (*base_count_arr)[MAX_TX_SQUARE],
int (*base_mag_arr)[2],
const tran_low_t *qcoeff, int stride,
- int eob, const int16_t *scan) {
+ int height, int eob, const int16_t *scan) {
for (int c = 0; c < eob; ++c) {
const int coeff_idx = scan[c]; // raster order
if (!has_base(qcoeff[coeff_idx], 0)) continue;
const int row = coeff_idx / stride;
const int col = coeff_idx % stride;
int *mag = base_mag_arr[coeff_idx];
- get_mag(mag, qcoeff, stride, row, col, base_ref_offset,
+ get_mag(mag, qcoeff, stride, height, row, col, base_ref_offset,
BASE_CONTEXT_POSITION_NUM);
for (int i = 0; i < NUM_BASE_LEVELS; ++i) {
if (!has_base(qcoeff[coeff_idx], i)) continue;
int *count = base_count_arr[i] + coeff_idx;
- *count = get_level_count(qcoeff, stride, row, col, i, base_ref_offset,
- BASE_CONTEXT_POSITION_NUM);
+ *count = get_level_count(qcoeff, stride, height, row, col, i,
+ base_ref_offset, BASE_CONTEXT_POSITION_NUM);
}
}
}
static void gen_nz_count_arr(int(*nz_count_arr), const tran_low_t *qcoeff,
- int stride, int eob,
+ int stride, int height, int eob,
const SCAN_ORDER *scan_order) {
const int16_t *scan = scan_order->scan;
const int16_t *iscan = scan_order->iscan;
@@ -436,7 +443,8 @@ static void gen_nz_count_arr(int(*nz_count_arr), const tran_low_t *qcoeff,
const int coeff_idx = scan[c]; // raster order
const int row = coeff_idx / stride;
const int col = coeff_idx % stride;
- nz_count_arr[coeff_idx] = get_nz_count(qcoeff, stride, row, col, iscan);
+ nz_count_arr[coeff_idx] =
+ get_nz_count(qcoeff, stride, height, row, col, iscan);
}
}
@@ -478,8 +486,8 @@ static INLINE int has_br(tran_low_t qc) {
}
static void gen_br_count_mag_arr(int *br_count_arr, int (*br_mag_arr)[2],
- const tran_low_t *qcoeff, int stride, int eob,
- const int16_t *scan) {
+ const tran_low_t *qcoeff, int stride,
+ int height, int eob, const int16_t *scan) {
for (int c = 0; c < eob; ++c) {
const int coeff_idx = scan[c]; // raster order
if (!has_br(qcoeff[coeff_idx])) continue;
@@ -487,9 +495,9 @@ static void gen_br_count_mag_arr(int *br_count_arr, int (*br_mag_arr)[2],
const int col = coeff_idx % stride;
int *count = br_count_arr + coeff_idx;
int *mag = br_mag_arr[coeff_idx];
- *count = get_level_count(qcoeff, stride, row, col, NUM_BASE_LEVELS,
+ *count = get_level_count(qcoeff, stride, height, row, col, NUM_BASE_LEVELS,
br_ref_offset, BR_CONTEXT_POSITION_NUM);
- get_mag(mag, qcoeff, stride, row, col, br_ref_offset,
+ get_mag(mag, qcoeff, stride, height, row, col, br_ref_offset,
BR_CONTEXT_POSITION_NUM);
}
}
@@ -543,18 +551,19 @@ static INLINE int get_golomb_cost(int abs_qc) {
void gen_txb_cache(TxbCache *txb_cache, TxbInfo *txb_info) {
const int16_t *scan = txb_info->scan_order->scan;
gen_nz_count_arr(txb_cache->nz_count_arr, txb_info->qcoeff, txb_info->stride,
- txb_info->eob, txb_info->scan_order);
+ txb_info->height, txb_info->eob, txb_info->scan_order);
gen_nz_ctx_arr(txb_cache->nz_ctx_arr, txb_cache->nz_count_arr,
txb_info->qcoeff, txb_info->bwl, txb_info->eob,
txb_info->scan_order);
gen_base_count_mag_arr(txb_cache->base_count_arr, txb_cache->base_mag_arr,
- txb_info->qcoeff, txb_info->stride, txb_info->eob,
- scan);
+ txb_info->qcoeff, txb_info->stride, txb_info->height,
+ txb_info->eob, scan);
gen_base_ctx_arr(txb_cache->base_ctx_arr, txb_cache->base_count_arr,
txb_cache->base_mag_arr, txb_info->qcoeff, txb_info->stride,
txb_info->eob, scan);
gen_br_count_mag_arr(txb_cache->br_count_arr, txb_cache->br_mag_arr,
- txb_info->qcoeff, txb_info->stride, txb_info->eob, scan);
+ txb_info->qcoeff, txb_info->stride, txb_info->height,
+ txb_info->eob, scan);
gen_br_ctx_arr(txb_cache->br_ctx_arr, txb_cache->br_count_arr,
txb_cache->br_mag_arr, txb_info->qcoeff, txb_info->stride,
txb_info->eob, scan);
@@ -781,7 +790,7 @@ static int try_self_level_down(tran_low_t *low_coeff, int coeff_idx,
if (scan_idx < txb_info->seg_eob) {
const int eob_ctx =
- get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->bwl);
+ get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->txs_ctx);
cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx],
scan_idx == (txb_info->eob - 1));
}
@@ -853,9 +862,13 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache,
const int nb_row = row - sig_ref_offset[i][0];
const int nb_col = col - sig_ref_offset[i][1];
const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+
+ if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
+ nb_col < txb_info->stride))
+ continue;
+
const int nb_scan_idx = iscan[nb_coeff_idx];
- if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
- nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ if (nb_scan_idx < eob) {
const int cost_diff = try_neighbor_level_down_nz(
nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
if (cost_map)
@@ -871,9 +884,13 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache,
const int nb_row = row - base_ref_offset[i][0];
const int nb_col = col - base_ref_offset[i][1];
const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+
+ if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
+ nb_col < txb_info->stride))
+ continue;
+
const int nb_scan_idx = iscan[nb_coeff_idx];
- if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
- nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ if (nb_scan_idx < eob) {
const int cost_diff = try_neighbor_level_down_base(
nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
if (cost_map)
@@ -889,9 +906,13 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache,
const int nb_row = row - br_ref_offset[i][0];
const int nb_col = col - br_ref_offset[i][1];
const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+
+ if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
+ nb_col < txb_info->stride))
+ continue;
+
const int nb_scan_idx = iscan[nb_coeff_idx];
- if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
- nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ if (nb_scan_idx < eob) {
const int cost_diff = try_neighbor_level_down_br(
nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
if (cost_map)
@@ -925,7 +946,7 @@ static int get_low_coeff_cost(int coeff_idx, const TxbCache *txb_cache,
cost += get_base_cost(abs_qc, ctx, txb_probs->coeff_base, base_idx);
if (scan_idx < txb_info->seg_eob) {
const int eob_ctx =
- get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->bwl);
+ get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->txs_ctx);
cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx],
scan_idx == (txb_info->eob - 1));
}
@@ -982,7 +1003,7 @@ int try_change_eob(int *new_eob, int coeff_idx, const TxbCache *txb_cache,
// Note that get_eob_ctx does NOT actually account for qcoeff, so we don't
// need to lower down the qcoeff here
const int eob_ctx =
- get_eob_ctx(txb_info->qcoeff, scan[*new_eob - 1], txb_info->bwl);
+ get_eob_ctx(txb_info->qcoeff, scan[*new_eob - 1], txb_info->txs_ctx);
cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx], 0);
cost_diff += av1_cost_bit(txb_probs->eob_flag[eob_ctx], 1);
} else {
@@ -1016,10 +1037,14 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) {
for (int i = 0; i < SIG_REF_OFFSET_NUM; ++i) {
const int nb_row = row - sig_ref_offset[i][0];
const int nb_col = col - sig_ref_offset[i][1];
+
+ if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
+ nb_col < txb_info->stride))
+ continue;
+
const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
const int nb_scan_idx = iscan[nb_coeff_idx];
- if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
- nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ if (nb_scan_idx < eob) {
const int scan_idx = iscan[coeff_idx];
if (scan_idx < nb_scan_idx) {
const int level = 1;
@@ -1030,7 +1055,7 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) {
const int count = txb_cache->nz_count_arr[nb_coeff_idx];
txb_cache->nz_ctx_arr[nb_coeff_idx][0] = get_nz_map_ctx_from_count(
count, txb_info->qcoeff, nb_coeff_idx, txb_info->bwl, iscan);
- // int ref_ctx = get_nz_map_ctx2(txb_info->qcoeff, nb_coeff_idx,
+ // int ref_ctx = get_nz_map_ctx(txb_info->qcoeff, nb_coeff_idx,
// txb_info->bwl, iscan);
// if (ref_ctx != txb_cache->nz_ctx_arr[nb_coeff_idx][0])
// printf("nz ctx %d ref_ctx %d\n",
@@ -1043,11 +1068,15 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) {
const int nb_row = row - base_ref_offset[i][0];
const int nb_col = col - base_ref_offset[i][1];
const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+
+ if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
+ nb_col < txb_info->stride))
+ continue;
+
const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
if (!has_base(nb_coeff, 0)) continue;
const int nb_scan_idx = iscan[nb_coeff_idx];
- if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
- nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ if (nb_scan_idx < eob) {
if (row >= nb_row && col >= nb_col)
update_mag_arr(txb_cache->base_mag_arr[nb_coeff_idx], abs_qc);
const int mag =
@@ -1076,11 +1105,15 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) {
const int nb_row = row - br_ref_offset[i][0];
const int nb_col = col - br_ref_offset[i][1];
const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+
+ if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
+ nb_col < txb_info->stride))
+ continue;
+
const int nb_scan_idx = iscan[nb_coeff_idx];
const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
if (!has_br(nb_coeff)) continue;
- if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
- nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ if (nb_scan_idx < eob) {
const int level = 1 + NUM_BASE_LEVELS;
if (abs_qc == level) {
txb_cache->br_count_arr[nb_coeff_idx] -= 1;
@@ -1112,8 +1145,8 @@ static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info,
const int16_t *iscan = txb_info->scan_order->iscan;
if (scan_idx < txb_info->seg_eob) {
- int coeff_ctx =
- get_nz_map_ctx2(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, iscan);
+ int coeff_ctx = get_nz_map_ctx(txb_info->qcoeff, scan[scan_idx],
+ txb_info->bwl, txb_info->height, iscan);
cost += av1_cost_bit(txb_probs->nz_map[coeff_ctx], is_nz);
}
@@ -1122,7 +1155,8 @@ static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info,
txb_ctx->dc_sign_ctx);
int ctx_ls[NUM_BASE_LEVELS] = { 0 };
- get_base_ctx_set(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, ctx_ls);
+ get_base_ctx_set(txb_info->qcoeff, scan[scan_idx], txb_info->bwl,
+ txb_info->height, ctx_ls);
int i;
for (i = 0; i < NUM_BASE_LEVELS; ++i) {
@@ -1130,14 +1164,15 @@ static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info,
}
if (abs_qc > NUM_BASE_LEVELS) {
- int ctx = get_br_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl);
+ int ctx = get_br_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl,
+ txb_info->height);
cost += get_br_cost(abs_qc, ctx, txb_probs->coeff_lps);
cost += get_golomb_cost(abs_qc);
}
if (scan_idx < txb_info->seg_eob) {
int eob_ctx =
- get_eob_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl);
+ get_eob_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->txs_ctx);
cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx],
scan_idx == (txb_info->eob - 1));
}
@@ -1323,8 +1358,7 @@ void try_level_down_facade(LevelDownStats *stats, int scan_idx,
test_level_down(coeff_idx, txb_cache, txb_probs, txb_info);
#endif
}
- stats->rd_diff = RDCOST(txb_info->rdmult, txb_info->rddiv, stats->cost_diff,
- stats->dist_diff);
+ stats->rd_diff = RDCOST(txb_info->rdmult, stats->cost_diff, stats->dist_diff);
if (stats->rd_diff < 0) stats->update = 1;
return;
}
@@ -1424,18 +1458,17 @@ static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs,
// These numbers are empirically obtained.
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
-#if CONFIG_EC_ADAPT
{ 17, 13 }, { 16, 10 },
-#else
- { 20, 12 }, { 16, 12 },
-#endif
};
-int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
- TX_SIZE tx_size, TXB_CTX *txb_ctx) {
+int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
+ int blk_row, int blk_col, int block, TX_SIZE tx_size,
+ TXB_CTX *txb_ctx) {
MACROBLOCKD *const xd = &x->e_mbd;
const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+ const TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
@@ -1445,34 +1478,34 @@ int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
const int16_t *dequant = pd->dequant;
const int seg_eob = AOMMIN(eob, tx_size_2d[tx_size] - 1);
- const aom_prob *nz_map = xd->fc->nz_map[tx_size][plane_type];
+ const aom_prob *nz_map = xd->fc->nz_map[txs_ctx][plane_type];
const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
const int stride = 1 << bwl;
+ const int height = tx_size_high[tx_size];
aom_prob(*coeff_base)[COEFF_BASE_CONTEXTS] =
- xd->fc->coeff_base[tx_size][plane_type];
+ xd->fc->coeff_base[txs_ctx][plane_type];
- const aom_prob *coeff_lps = xd->fc->coeff_lps[tx_size][plane_type];
+ const aom_prob *coeff_lps = xd->fc->coeff_lps[txs_ctx][plane_type];
const int is_inter = is_inter_block(mbmi);
- const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
const TxbProbs txb_probs = { xd->fc->dc_sign[plane_type],
nz_map,
coeff_base,
coeff_lps,
- xd->fc->eob_flag[tx_size][plane_type],
- xd->fc->txb_skip[tx_size] };
+ xd->fc->eob_flag[txs_ctx][plane_type],
+ xd->fc->txb_skip[txs_ctx] };
const int shift = av1_get_tx_scale(tx_size);
const int64_t rdmult =
(x->rdmult * plane_rd_mult[is_inter][plane_type] + 2) >> 2;
- const int64_t rddiv = x->rddiv;
- TxbInfo txb_info = { qcoeff, dqcoeff, tcoeff, dequant, shift,
- tx_size, bwl, stride, eob, seg_eob,
- scan_order, txb_ctx, rdmult, rddiv };
+ TxbInfo txb_info = { qcoeff, dqcoeff, tcoeff, dequant, shift,
+ tx_size, txs_ctx, bwl, stride, height,
+ eob, seg_eob, scan_order, txb_ctx, rdmult };
+
TxbCache txb_cache;
gen_txb_cache(&txb_cache, &txb_info);
@@ -1510,9 +1543,9 @@ void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
const uint16_t eob = p->eobs[block];
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
const PLANE_TYPE plane_type = pd->plane_type;
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+ const TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
(void)plane_bsize;
int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob);
@@ -1536,25 +1569,28 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block);
const int segment_id = mbmi->segment_id;
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+ const TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
const int16_t *scan = scan_order->scan;
+ const int16_t *iscan = scan_order->iscan;
const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
int c, i;
TXB_CTX txb_ctx;
get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col,
pd->left_context + blk_row, &txb_ctx);
const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+ const int height = tx_size_high[tx_size];
int cul_level = 0;
unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2];
- uint8_t txb_mask[32 * 32] = { 0 };
- nz_map_count = &td->counts->nz_map[tx_size][plane_type];
+ TX_SIZE txsize_ctx = get_txsize_context(tx_size);
+
+ nz_map_count = &td->counts->nz_map[txsize_ctx][plane_type];
memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob);
- ++td->counts->txb_skip[tx_size][txb_ctx.txb_skip_ctx][eob == 0];
+ ++td->counts->txb_skip[txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0];
x->mbmi_ext->txb_skip_ctx[plane][block] = txb_ctx.txb_skip_ctx;
x->mbmi_ext->eobs[plane][block] = eob;
@@ -1565,24 +1601,23 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
}
#if CONFIG_TXK_SEL
- av1_update_tx_type_count(cm, xd, block, plane, mbmi->sb_type, tx_size,
- td->counts);
+ av1_update_tx_type_count(cm, xd, blk_row, blk_col, block, plane,
+ mbmi->sb_type, get_min_tx_size(tx_size), td->counts);
#endif
for (c = 0; c < eob; ++c) {
tran_low_t v = qcoeff[scan[c]];
int is_nz = (v != 0);
- int coeff_ctx = get_nz_map_ctx(tcoeff, txb_mask, scan[c], bwl);
- int eob_ctx = get_eob_ctx(tcoeff, scan[c], bwl);
+ int coeff_ctx = get_nz_map_ctx(tcoeff, scan[c], bwl, height, iscan);
+ int eob_ctx = get_eob_ctx(tcoeff, scan[c], txsize_ctx);
if (c == seg_eob - 1) break;
++(*nz_map_count)[coeff_ctx][is_nz];
if (is_nz) {
- ++td->counts->eob_flag[tx_size][plane_type][eob_ctx][c == (eob - 1)];
+ ++td->counts->eob_flag[txsize_ctx][plane_type][eob_ctx][c == (eob - 1)];
}
- txb_mask[scan[c]] = 1;
}
// Reverse process order to handle coefficient level and sign.
@@ -1595,10 +1630,10 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
if (level <= i) continue;
- ctx = get_base_ctx(tcoeff, scan[c], bwl, i + 1);
+ ctx = get_base_ctx(tcoeff, scan[c], bwl, height, i + 1);
if (level == i + 1) {
- ++td->counts->coeff_base[tx_size][plane_type][i][ctx][1];
+ ++td->counts->coeff_base[txsize_ctx][plane_type][i][ctx][1];
if (c == 0) {
int dc_sign_ctx = txb_ctx.dc_sign_ctx;
@@ -1608,7 +1643,7 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
cul_level += level;
continue;
}
- ++td->counts->coeff_base[tx_size][plane_type][i][ctx][0];
+ ++td->counts->coeff_base[txsize_ctx][plane_type][i][ctx][0];
update_eob = AOMMAX(update_eob, c);
}
}
@@ -1630,13 +1665,13 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
}
// level is above 1.
- ctx = get_br_ctx(tcoeff, scan[c], bwl);
+ ctx = get_br_ctx(tcoeff, scan[c], bwl, height);
for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
if (level == (idx + 1 + NUM_BASE_LEVELS)) {
- ++td->counts->coeff_lps[tx_size][plane_type][ctx][1];
+ ++td->counts->coeff_lps[txsize_ctx][plane_type][ctx][1];
break;
}
- ++td->counts->coeff_lps[tx_size][plane_type][ctx][0];
+ ++td->counts->coeff_lps[txsize_ctx][plane_type][ctx][0];
}
if (idx < COEFF_BASE_RANGE) continue;
@@ -1835,46 +1870,74 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
TX_TYPE txk_end = TX_TYPES - 1;
TX_TYPE best_tx_type = txk_start;
int64_t best_rd = INT64_MAX;
+ uint8_t best_eob = 0;
const int coeff_ctx = combine_entropy_contexts(*a, *l);
+ RD_STATS best_rd_stats;
TX_TYPE tx_type;
+
+ av1_invalid_rd_stats(&best_rd_stats);
+
for (tx_type = txk_start; tx_type <= txk_end; ++tx_type) {
- if (plane == 0) mbmi->txk_type[block] = tx_type;
- TX_TYPE ref_tx_type =
- get_tx_type(get_plane_type(plane), xd, block, tx_size);
+ if (plane == 0) mbmi->txk_type[(blk_row << 4) + blk_col] = tx_type;
+ TX_TYPE ref_tx_type = av1_get_tx_type(get_plane_type(plane), xd, blk_row,
+ blk_col, block, tx_size);
if (tx_type != ref_tx_type) {
- // use get_tx_type() to check if the tx_type is valid for the current mode
- // if it's not, we skip it here.
+ // use av1_get_tx_type() to check if the tx_type is valid for the current
+ // mode if it's not, we skip it here.
continue;
}
+
+#if CONFIG_EXT_TX
+ int is_inter = is_inter_block(mbmi);
+ int ext_tx_set = get_ext_tx_set(get_min_tx_size(tx_size), mbmi->sb_type,
+ is_inter, cm->reduced_tx_set_used);
+ if (!(is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) &&
+ !(!is_inter && ext_tx_used_intra[ext_tx_set][tx_type]))
+ continue;
+#endif // CONFIG_EXT_TX
+
RD_STATS this_rd_stats;
av1_invalid_rd_stats(&this_rd_stats);
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
+ av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
+ a, l);
av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size,
&this_rd_stats.dist, &this_rd_stats.sse,
OUTPUT_HAS_PREDICTED_PIXELS);
- const SCAN_ORDER *scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
- this_rd_stats.rate = av1_cost_coeffs(
- cpi, x, plane, block, tx_size, scan_order, a, l, use_fast_coef_costing);
- int rd =
- RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
+ this_rd_stats.rate =
+ av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
+ scan_order, a, l, use_fast_coef_costing);
+ int rd = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
+
if (rd < best_rd) {
best_rd = rd;
- *rd_stats = this_rd_stats;
+ best_rd_stats = this_rd_stats;
best_tx_type = tx_type;
+ best_eob = x->plane[plane].txb_entropy_ctx[block];
}
}
- if (plane == 0) mbmi->txk_type[block] = best_tx_type;
- // TODO(angiebird): Instead of re-call av1_xform_quant and av1_optimize_b,
- // copy the best result in the above tx_type search for loop
- av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- coeff_ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
+
+ av1_merge_rd_stats(rd_stats, &best_rd_stats);
+
+ // if (x->plane[plane].eobs[block] == 0)
+ // if (best_tx_type != DCT_DCT)
+ // exit(0);
+
+ if (best_eob == 0 && is_inter_block(mbmi)) best_tx_type = DCT_DCT;
+
+ if (plane == 0) mbmi->txk_type[(blk_row << 4) + blk_col] = best_tx_type;
+ x->plane[plane].txb_entropy_ctx[block] = best_eob;
+
if (!is_inter_block(mbmi)) {
// intra mode needs decoded result such that the next transform block
// can use it for prediction.
+ av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ coeff_ctx, AV1_XFORM_QUANT_FP);
+ av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
+ a, l);
+
av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
x->plane[plane].eobs[block]);
}
diff --git a/third_party/aom/av1/encoder/encodetxb.h b/third_party/aom/av1/encoder/encodetxb.h
index 836033a54..cbafe59c9 100644
--- a/third_party/aom/av1/encoder/encodetxb.h
+++ b/third_party/aom/av1/encoder/encodetxb.h
@@ -30,14 +30,15 @@ typedef struct TxbInfo {
const int16_t *dequant;
int shift;
TX_SIZE tx_size;
+ TX_SIZE txs_ctx;
int bwl;
int stride;
+ int height;
int eob;
int seg_eob;
const SCAN_ORDER *scan_order;
TXB_CTX *txb_ctx;
int64_t rdmult;
- int64_t rddiv;
} TxbInfo;
typedef struct TxbCache {
@@ -66,11 +67,12 @@ typedef struct TxbProbs {
void av1_alloc_txb_buf(AV1_COMP *cpi);
void av1_free_txb_buf(AV1_COMP *cpi);
int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
- int block, TXB_CTX *txb_ctx);
+ int blk_row, int blk_col, int block, TX_SIZE tx_size,
+ TXB_CTX *txb_ctx);
void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
- aom_writer *w, int block, int plane,
- const tran_low_t *tcoeff, uint16_t eob,
- TXB_CTX *txb_ctx);
+ aom_writer *w, int blk_row, int blk_col, int block,
+ int plane, TX_SIZE tx_size, const tran_low_t *tcoeff,
+ uint16_t eob, TXB_CTX *txb_ctx);
void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x,
aom_writer *w, int plane);
int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
@@ -95,8 +97,9 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
int use_fast_coef_costing, RD_STATS *rd_stats);
#endif
-int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
- TX_SIZE tx_size, TXB_CTX *txb_ctx);
+int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
+ int blk_row, int blk_col, int block, TX_SIZE tx_size,
+ TXB_CTX *txb_ctx);
#ifdef __cplusplus
}
#endif
diff --git a/third_party/aom/av1/encoder/ethread.c b/third_party/aom/av1/encoder/ethread.c
index 7af5f78b6..1aa1d52a2 100644
--- a/third_party/aom/av1/encoder/ethread.c
+++ b/third_party/aom/av1/encoder/ethread.c
@@ -26,6 +26,10 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
td_t->rd_counts.global_motion_used[i];
#endif // CONFIG_GLOBAL_MOTION
+ td->rd_counts.compound_ref_used_flag |=
+ td_t->rd_counts.compound_ref_used_flag;
+ td->rd_counts.single_ref_used_flag |= td_t->rd_counts.single_ref_used_flag;
+
for (i = 0; i < TX_SIZES; i++)
for (j = 0; j < PLANE_TYPES; j++)
for (k = 0; k < REF_TYPES; k++)
@@ -122,11 +126,9 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
#if CONFIG_PALETTE
// Allocate buffers used by palette coding mode.
- if (cpi->common.allow_screen_content_tools) {
- CHECK_MEM_ERROR(
- cm, thread_data->td->palette_buffer,
- aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
- }
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->palette_buffer,
+ aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
#endif // CONFIG_PALETTE
// Create threads
@@ -168,7 +170,7 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
}
#if CONFIG_PALETTE
- if (cpi->common.allow_screen_content_tools && i < num_workers - 1)
+ if (i < num_workers - 1)
thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer;
#endif // CONFIG_PALETTE
}
diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c
index 7a0abba2d..e7d78d83e 100644
--- a/third_party/aom/av1/encoder/firstpass.c
+++ b/third_party/aom/av1/encoder/firstpass.c
@@ -456,6 +456,31 @@ static void set_first_pass_params(AV1_COMP *cpi) {
cpi->rc.frames_to_key = INT_MAX;
}
+#if CONFIG_FLEX_REFS
+static double raw_motion_error_stdev(int *raw_motion_err_list,
+ int raw_motion_err_counts) {
+ int64_t sum_raw_err = 0;
+ double raw_err_avg = 0;
+ double raw_err_stdev = 0;
+ if (raw_motion_err_counts == 0) return 0;
+
+ int i;
+ for (i = 0; i < raw_motion_err_counts; i++) {
+ sum_raw_err += raw_motion_err_list[i];
+ }
+ raw_err_avg = sum_raw_err / raw_motion_err_counts;
+ for (i = 0; i < raw_motion_err_counts; i++) {
+ raw_err_stdev += (raw_motion_err_list[i] - raw_err_avg) *
+ (raw_motion_err_list[i] - raw_err_avg);
+ }
+ // Calculate the standard deviation for the motion error of all the inter
+ // blocks of the 0,0 motion using the last source
+ // frame as the reference.
+ raw_err_stdev = sqrt(raw_err_stdev / raw_motion_err_counts);
+ return raw_err_stdev;
+}
+#endif // CONFIG_FLEX_REFS
+
#define UL_INTRA_THRESH 50
#define INVALID_ROW -1
void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
@@ -506,6 +531,13 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
od_adapt_ctx pvq_context;
#endif
+#if CONFIG_FLEX_REFS
+ int *raw_motion_err_list;
+ int raw_motion_err_counts = 0;
+ CHECK_MEM_ERROR(
+ cm, raw_motion_err_list,
+ aom_calloc(cm->mb_rows * cm->mb_cols, sizeof(*raw_motion_err_list)));
+#endif // CONFIG_FLEX_REFS
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
@@ -968,6 +1000,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
}
}
}
+#if CONFIG_FLEX_REFS
+ raw_motion_err_list[raw_motion_err_counts++] = raw_motion_error;
+#endif // CONFIG_FLEX_REFS
} else {
sr_coded_error += (int64_t)this_error;
}
@@ -981,7 +1016,6 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
recon_yoffset += 16;
recon_uvoffset += uv_mb_height;
}
-
// Adjust to the next row of MBs.
x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
x->plane[1].src.buf +=
@@ -991,7 +1025,10 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
aom_clear_system_state();
}
-
+#if CONFIG_FLEX_REFS
+ const double raw_err_stdev =
+ raw_motion_error_stdev(raw_motion_err_list, raw_motion_err_counts);
+#endif // CONFIG_FLEX_REFS
#if CONFIG_PVQ
#if !CONFIG_ANS
od_ec_enc_clear(&x->daala_enc.w.ec);
@@ -1045,6 +1082,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
fps.inactive_zone_rows = (double)image_data_start_row;
fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix
+#if CONFIG_FLEX_REFS
+ fps.raw_error_stdev = raw_err_stdev;
+#endif // CONFIG_FLEX_REFS
if (mvcount > 0) {
fps.MVr = (double)sum_mvr / mvcount;
@@ -1231,27 +1271,6 @@ static void setup_rf_level_maxq(AV1_COMP *cpi) {
}
}
-void av1_calculate_next_scaled_size(const AV1_COMP *cpi,
- int *scaled_frame_width,
- int *scaled_frame_height) {
- *scaled_frame_width =
- cpi->oxcf.width * cpi->resize_next_scale_num / cpi->resize_next_scale_den;
- *scaled_frame_height = cpi->oxcf.height * cpi->resize_next_scale_num /
- cpi->resize_next_scale_den;
-}
-
-#if CONFIG_FRAME_SUPERRES
-void av1_calculate_superres_size(const AV1_COMP *cpi, int *encoded_width,
- int *encoded_height) {
- *encoded_width = cpi->oxcf.scaled_frame_width *
- cpi->common.superres_scale_numerator /
- SUPERRES_SCALE_DENOMINATOR;
- *encoded_height = cpi->oxcf.scaled_frame_height *
- cpi->common.superres_scale_numerator /
- SUPERRES_SCALE_DENOMINATOR;
-}
-#endif // CONFIG_FRAME_SUPERRES
-
void av1_init_second_pass(AV1_COMP *cpi) {
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
TWO_PASS *const twopass = &cpi->twopass;
@@ -1673,6 +1692,9 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
// (3) The bi-predictive group interval is strictly smaller than the
// golden group interval.
const int is_bipred_enabled =
+#if CONFIG_FLEX_REFS
+ cpi->bwd_ref_allowed &&
+#endif
rc->source_alt_ref_pending && rc->bipred_group_interval &&
rc->bipred_group_interval <=
(rc->baseline_gf_interval - rc->source_alt_ref_pending);
@@ -2046,6 +2068,11 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const int is_key_frame = frame_is_intra_only(cm);
const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
+#if CONFIG_FLEX_REFS
+ cpi->extra_arf_allowed = 1;
+ cpi->bwd_ref_allowed = 1;
+#endif
+
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
if (is_key_frame == 0) {
@@ -2106,6 +2133,12 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
}
+#if CONFIG_FLEX_REFS
+ double avg_sr_coded_error = 0;
+ double avg_raw_err_stdev = 0;
+ int non_zero_stdev_count = 0;
+#endif // CONFIG_FLEX_REFS
+
i = 0;
while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
++i;
@@ -2129,6 +2162,14 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
accumulate_frame_motion_stats(
&next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+#if CONFIG_FLEX_REFS
+ // sum up the metric values of current gf group
+ avg_sr_coded_error += next_frame.sr_coded_error;
+ if (next_frame.raw_error_stdev) {
+ non_zero_stdev_count++;
+ avg_raw_err_stdev += next_frame.raw_error_stdev;
+ }
+#endif // CONFIG_FLEX_REFS
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
@@ -2175,7 +2216,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
*this_frame = next_frame;
old_boost_score = boost_score;
}
-
twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
// Was the group length constrained by the requirement for a new KF?
@@ -2202,11 +2242,35 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Set the interval until the next gf.
rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
-
#if CONFIG_EXT_REFS
- // Compute how many extra alt_refs we can have
- cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
- rc->source_alt_ref_pending);
+#if CONFIG_FLEX_REFS
+ const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
+ : cpi->common.MBs;
+ if (i) avg_sr_coded_error /= i;
+ if (non_zero_stdev_count) avg_raw_err_stdev /= non_zero_stdev_count;
+
+ // Disable extra alter refs and backward ref for "still" gf group
+ // zero_motion_accumulator indicates the minimum percentage of (0, 0) motion
+ // in gf group
+ // avg_sr_coded_error indicates the average of the sse per pixel of each frame
+ // in gf group
+ // avg_raw_err_stdev indicates the average of the standard deviation of (0, 0)
+ // motion error per block of each frame in gf group
+ assert(num_mbs > 0);
+ const int disable_bwd_extarf =
+ (zero_motion_accumulator > MIN_ZERO_MOTION &&
+ avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR &&
+ avg_raw_err_stdev < MAX_RAW_ERR_VAR);
+
+ if (disable_bwd_extarf) cpi->extra_arf_allowed = cpi->bwd_ref_allowed = 0;
+
+ if (!cpi->extra_arf_allowed)
+ cpi->num_extra_arfs = 0;
+ else
+#endif // CONFIG_FLEX_REFS
+ // Compute how many extra alt_refs we can have
+ cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
+ rc->source_alt_ref_pending);
// Currently at maximum two extra ARFs' are allowed
assert(cpi->num_extra_arfs <= MAX_EXT_ARFS);
#endif // CONFIG_EXT_REFS
@@ -2291,12 +2355,6 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->section_intra_rating = calculate_section_intra_ratio(
start_pos, twopass->stats_in_end, rc->baseline_gf_interval);
}
-
- if (oxcf->resize_mode == RESIZE_DYNAMIC) {
- // Default to starting GF groups at normal frame size.
- // TODO(afergs): Make a function for this
- cpi->resize_next_scale_num = cpi->resize_next_scale_den;
- }
}
// Threshold for use of the lagging second reference frame. High second ref
@@ -2638,12 +2696,6 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// The count of bits left is adjusted elsewhere based on real coded frame
// sizes.
twopass->modified_error_left -= kf_group_err;
-
- if (oxcf->resize_mode == RESIZE_DYNAMIC) {
- // Default to normal-sized frame on keyframes.
- // TODO(afergs): Make a function for this
- cpi->resize_next_scale_num = cpi->resize_next_scale_den;
- }
}
// Define the reference buffers that will be updated post encode.
@@ -2741,7 +2793,7 @@ static void configure_buffer_updates(AV1_COMP *cpi) {
break;
case LAST_BIPRED_UPDATE:
- cpi->refresh_last_frame = 0;
+ cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_bwd_ref_frame = 0;
cpi->refresh_alt_ref_frame = 0;
diff --git a/third_party/aom/av1/encoder/firstpass.h b/third_party/aom/av1/encoder/firstpass.h
index 43104454c..266766d99 100644
--- a/third_party/aom/av1/encoder/firstpass.h
+++ b/third_party/aom/av1/encoder/firstpass.h
@@ -52,6 +52,13 @@ typedef struct {
#define MIN_EXT_ARF_INTERVAL 4
#endif // CONFIG_EXT_REFS
+#if CONFIG_FLEX_REFS
+#define MIN_ZERO_MOTION 0.95
+#define MAX_SR_CODED_ERROR 40
+#define MAX_RAW_ERR_VAR 2000
+#define MIN_MV_IN_OUT 0.4
+#endif // CONFIG_FLEX_REFS
+
#define VLOW_MOTION_THRESHOLD 950
typedef struct {
@@ -77,6 +84,10 @@ typedef struct {
double new_mv_count;
double duration;
double count;
+#if CONFIG_FLEX_REFS
+ // standard deviation for (0, 0) motion prediction error
+ double raw_error_stdev;
+#endif // CONFIG_FLEX_REFS
} FIRSTPASS_STATS;
typedef enum {
@@ -177,18 +188,6 @@ void av1_twopass_postencode_update(struct AV1_COMP *cpi);
// Post encode update of the rate control parameters for 2-pass
void av1_twopass_postencode_update(struct AV1_COMP *cpi);
-void av1_calculate_next_scaled_size(const struct AV1_COMP *cpi,
- int *scaled_frame_width,
- int *scaled_frame_height);
-
-#if CONFIG_FRAME_SUPERRES
-// This is the size after superress scaling, which could be 1:1.
-// Superres scaling happens after regular downscaling.
-// TODO(afergs): Limit overall reduction to 1/2 of the original size
-void av1_calculate_superres_size(const struct AV1_COMP *cpi, int *encoded_width,
- int *encoded_height);
-#endif // CONFIG_FRAME_SUPERRES
-
#if CONFIG_EXT_REFS
static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
if (arf_pending && MAX_EXT_ARFS > 0)
diff --git a/third_party/aom/av1/encoder/global_motion.c b/third_party/aom/av1/encoder/global_motion.c
index 74cbc8ae7..661a1feb4 100644
--- a/third_party/aom/av1/encoder/global_motion.c
+++ b/third_party/aom/av1/encoder/global_motion.c
@@ -131,8 +131,8 @@ int64_t refine_integerized_param(WarpedMotionParams *wm,
#endif // CONFIG_HIGHBITDEPTH
uint8_t *ref, int r_width, int r_height,
int r_stride, uint8_t *dst, int d_width,
- int d_height, int d_stride,
- int n_refinements) {
+ int d_height, int d_stride, int n_refinements,
+ int64_t best_frame_error) {
static const int max_trans_model_params[TRANS_TYPES] = {
0, 2, 4, 6, 8, 8, 8
};
@@ -147,15 +147,16 @@ int64_t refine_integerized_param(WarpedMotionParams *wm,
int32_t best_param;
force_wmtype(wm, wmtype);
- best_error = av1_warp_error(wm,
+ best_error = av1_warp_error(
+ wm,
#if CONFIG_HIGHBITDEPTH
- use_hbd, bd,
+ use_hbd, bd,
#endif // CONFIG_HIGHBITDEPTH
- ref, r_width, r_height, r_stride,
- dst + border * d_stride + border, border, border,
- d_width - 2 * border, d_height - 2 * border,
- d_stride, 0, 0, 16, 16);
- step = 1 << (n_refinements + 1);
+ ref, r_width, r_height, r_stride, dst + border * d_stride + border,
+ border, border, d_width - 2 * border, d_height - 2 * border, d_stride, 0,
+ 0, SCALE_SUBPEL_SHIFTS, SCALE_SUBPEL_SHIFTS, best_frame_error);
+ best_error = AOMMIN(best_error, best_frame_error);
+ step = 1 << (n_refinements - 1);
for (i = 0; i < n_refinements; i++, step >>= 1) {
for (p = 0; p < n_params; ++p) {
int step_dir = 0;
@@ -174,7 +175,7 @@ int64_t refine_integerized_param(WarpedMotionParams *wm,
#endif // CONFIG_HIGHBITDEPTH
ref, r_width, r_height, r_stride, dst + border * d_stride + border,
border, border, d_width - 2 * border, d_height - 2 * border, d_stride,
- 0, 0, 16, 16);
+ 0, 0, SCALE_SUBPEL_SHIFTS, SCALE_SUBPEL_SHIFTS, best_error);
if (step_error < best_error) {
best_error = step_error;
best_param = *param;
@@ -190,7 +191,7 @@ int64_t refine_integerized_param(WarpedMotionParams *wm,
#endif // CONFIG_HIGHBITDEPTH
ref, r_width, r_height, r_stride, dst + border * d_stride + border,
border, border, d_width - 2 * border, d_height - 2 * border, d_stride,
- 0, 0, 16, 16);
+ 0, 0, SCALE_SUBPEL_SHIFTS, SCALE_SUBPEL_SHIFTS, best_error);
if (step_error < best_error) {
best_error = step_error;
best_param = *param;
@@ -209,7 +210,8 @@ int64_t refine_integerized_param(WarpedMotionParams *wm,
#endif // CONFIG_HIGHBITDEPTH
ref, r_width, r_height, r_stride, dst + border * d_stride + border,
border, border, d_width - 2 * border, d_height - 2 * border,
- d_stride, 0, 0, 16, 16);
+ d_stride, 0, 0, SCALE_SUBPEL_SHIFTS, SCALE_SUBPEL_SHIFTS,
+ best_error);
if (step_error < best_error) {
best_error = step_error;
best_param = *param;
diff --git a/third_party/aom/av1/encoder/global_motion.h b/third_party/aom/av1/encoder/global_motion.h
index 38509df6a..7fca5327f 100644
--- a/third_party/aom/av1/encoder/global_motion.h
+++ b/third_party/aom/av1/encoder/global_motion.h
@@ -36,7 +36,8 @@ int64_t refine_integerized_param(WarpedMotionParams *wm,
#endif // CONFIG_HIGHBITDEPTH
uint8_t *ref, int r_width, int r_height,
int r_stride, uint8_t *dst, int d_width,
- int d_height, int d_stride, int n_refinements);
+ int d_height, int d_stride, int n_refinements,
+ int64_t best_frame_error);
/*
Computes "num_motions" candidate global motion parameters between two frames.
diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
index c57deed84..85f4b7d9b 100644
--- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
+++ b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
@@ -18,7 +18,7 @@
#if CONFIG_CHROMA_2X2
static void fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type, int lossless) {
+ int diff_stride, TxfmParam *txfm_param) {
tran_high_t a1 = src_diff[0];
tran_high_t b1 = src_diff[1];
tran_high_t c1 = src_diff[diff_stride];
@@ -39,134 +39,151 @@ static void fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
coeff[2] = (tran_low_t)(4 * c1);
coeff[3] = (tran_low_t)(4 * d1);
- (void)tx_type;
- (void)lossless;
+ (void)txfm_param;
}
#endif
static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type, int lossless) {
- if (lossless) {
- assert(tx_type == DCT_DCT);
+ int diff_stride, TxfmParam *txfm_param) {
+ if (txfm_param->lossless) {
+ assert(txfm_param->tx_type == DCT_DCT);
av1_fwht4x4(src_diff, coeff, diff_stride);
return;
}
- av1_fht4x4(src_diff, coeff, diff_stride, tx_type);
+#if CONFIG_LGT
+ // only C version has LGTs
+ av1_fht4x4_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht4x4(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht4x8(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht4x8_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht4x8(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht8x4(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht8x4_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht8x4(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht8x16(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht8x16_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht8x16(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht16x8(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht16x8_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht16x8(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht16x32(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ av1_fht16x32(src_diff, coeff, diff_stride, txfm_param);
}
static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht32x16(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ av1_fht32x16(src_diff, coeff, diff_stride, txfm_param);
}
static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht8x8_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht8x8(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ av1_fht16x16(src_diff, coeff, diff_stride, txfm_param);
}
static void fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht32x32(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_MRC_TX
+ // MRC_DCT currently only has a C implementation
+ if (txfm_param->tx_type == MRC_DCT) {
+ av1_fht32x32_c(src_diff, coeff, diff_stride, txfm_param);
+ return;
+ }
+#endif // CONFIG_MRC_TX
+ av1_fht32x32(src_diff, coeff, diff_stride, txfm_param);
}
#if CONFIG_TX64X64
static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
+ int diff_stride, TxfmParam *txfm_param) {
#if CONFIG_EXT_TX
- if (tx_type == IDTX)
- av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type);
+ if (txfm_param->tx_type == IDTX)
+ av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, txfm_param->tx_type);
else
#endif
- av1_fht64x64(src_diff, coeff, diff_stride, tx_type);
+ av1_fht64x64(src_diff, coeff, diff_stride, txfm_param);
}
#endif // CONFIG_TX64X64
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
static void fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht16x4(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht16x4_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht16x4(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht4x16(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht4x16_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht4x16(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht32x8(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht32x8_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht32x8(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt) {
- (void)fwd_txfm_opt;
- av1_fht8x32(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_LGT
+ av1_fht8x32_c(src_diff, coeff, diff_stride, txfm_param);
+#else
+ av1_fht8x32(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
-#if CONFIG_HIGHBITDEPTH
#if CONFIG_CHROMA_2X2
static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type, int lossless,
- const int bd) {
+ int diff_stride, TxfmParam *txfm_param) {
tran_high_t a1 = src_diff[0];
tran_high_t b1 = src_diff[1];
tran_high_t c1 = src_diff[diff_stride];
@@ -187,27 +204,27 @@ static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
coeff[2] = (tran_low_t)(4 * c1);
coeff[3] = (tran_low_t)(4 * d1);
- (void)tx_type;
- (void)lossless;
- (void)bd;
+ (void)txfm_param;
}
#endif
static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type, int lossless,
- const int bd) {
- if (lossless) {
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ const int tx_type = txfm_param->tx_type;
+ const int bd = txfm_param->bd;
+ if (txfm_param->lossless) {
assert(tx_type == DCT_DCT);
av1_highbd_fwht4x4(src_diff, coeff, diff_stride);
return;
}
-
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
+ // fallthrough intended
+ av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@@ -215,80 +232,79 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
+ // fallthrough intended
+ av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
+ // use the c version for anything including identity for now
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
- av1_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ case IDTX:
+ // fallthrough intended
+ av1_fwd_txfm2d_4x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
- case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type); break;
#endif // CONFIG_EXT_TX
default: assert(0);
}
}
static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
- (void)bd;
- av1_highbd_fht4x8(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ av1_fwd_txfm2d_4x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
}
static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
- (void)bd;
- av1_highbd_fht8x4(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ av1_fwd_txfm2d_8x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
}
static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
- (void)bd;
- av1_highbd_fht8x16(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
}
static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
- (void)bd;
- av1_highbd_fht16x8(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
}
static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
- (void)bd;
- av1_highbd_fht16x32(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
}
static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
- (void)bd;
- av1_highbd_fht32x16(src_diff, coeff, diff_stride, tx_type);
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
}
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ const int tx_type = txfm_param->tx_type;
+ const int bd = txfm_param->bd;
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- av1_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
+ // fallthrough intended
+ av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@@ -296,33 +312,37 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- av1_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
+ // fallthrough intended
+ av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
+ // use the c version for anything including identity for now
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
- // Use C version since DST exists only in C
- av1_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ case IDTX:
+ // fallthrough intended
+ av1_fwd_txfm2d_8x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
- case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type); break;
#endif // CONFIG_EXT_TX
default: assert(0);
}
}
static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ const int tx_type = txfm_param->tx_type;
+ const int bd = txfm_param->bd;
switch (tx_type) {
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- av1_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
+ // fallthrough intended
+ av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@@ -330,63 +350,72 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- av1_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
+ // fallthrough intended
+ av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
+ // use the c version for anything including identity for now
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
- // Use C version since DST exists only in C
- av1_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ case IDTX:
+ // fallthrough intended
+ av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
- case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type); break;
#endif // CONFIG_EXT_TX
default: assert(0);
}
}
static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ const int tx_type = txfm_param->tx_type;
+ const int bd = txfm_param->bd;
switch (tx_type) {
case DCT_DCT:
- av1_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd);
- break;
-#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
+ // fallthrough intended
+ av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd);
+ break;
+#if CONFIG_EXT_TX
case FLIPADST_DCT:
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
+ // fallthrough intended
+ av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd);
+ break;
+ // use the c version for anything including identity for now
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
- av1_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ case IDTX:
+ // fallthrough intended
+ av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
- case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type); break;
#endif // CONFIG_EXT_TX
- default: assert(0); break;
+ default: assert(0);
}
}
#if CONFIG_TX64X64
static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, TX_TYPE tx_type,
- FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
- (void)fwd_txfm_opt;
- (void)bd;
+ int diff_stride, TxfmParam *txfm_param) {
+ int32_t *dst_coeff = (int32_t *)coeff;
+ const int tx_type = txfm_param->tx_type;
+ const int bd = txfm_param->bd;
switch (tx_type) {
case DCT_DCT:
- av1_highbd_fht64x64(src_diff, coeff, diff_stride, tx_type);
+ av1_fwd_txfm2d_64x64(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
@@ -403,141 +432,119 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
- av1_highbd_fht64x64(src_diff, coeff, diff_stride, tx_type);
+ // TODO(sarahparker)
+ // I've deleted the 64x64 implementations that existed in lieu
+ // of adst, flipadst and identity for simplicity but will bring back
+ // in a later change. This shouldn't impact performance since
+ // DCT_DCT is the only extended type currently allowed for 64x64,
+ // as dictated by get_ext_tx_set_type in blockd.h.
+ av1_fwd_txfm2d_64x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
+ break;
+ case IDTX:
+ av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 64, tx_type);
break;
- case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
}
#endif // CONFIG_TX64X64
-#endif // CONFIG_HIGHBITDEPTH
void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
- FWD_TXFM_PARAM *fwd_txfm_param) {
- const int fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
- const TX_TYPE tx_type = fwd_txfm_param->tx_type;
- const TX_SIZE tx_size = fwd_txfm_param->tx_size;
- const int lossless = fwd_txfm_param->lossless;
+ TxfmParam *txfm_param) {
+ const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
- fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
- fwd_txfm_32x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_16X16:
- fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
- break;
- case TX_8X8:
- fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
- break;
- case TX_4X8:
- fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
- break;
- case TX_8X4:
- fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_16x16(src_diff, coeff, diff_stride, txfm_param);
break;
+ case TX_8X8: fwd_txfm_8x8(src_diff, coeff, diff_stride, txfm_param); break;
+ case TX_4X8: fwd_txfm_4x8(src_diff, coeff, diff_stride, txfm_param); break;
+ case TX_8X4: fwd_txfm_8x4(src_diff, coeff, diff_stride, txfm_param); break;
case TX_8X16:
- fwd_txfm_8x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_8x16(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_16X8:
- fwd_txfm_16x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_16x8(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_16X32:
- fwd_txfm_16x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_16x32(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_32X16:
- fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
- break;
- case TX_4X4:
- fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
+ fwd_txfm_32x16(src_diff, coeff, diff_stride, txfm_param);
break;
+ case TX_4X4: fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param); break;
#if CONFIG_CHROMA_2X2
- case TX_2X2:
- fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless);
- break;
+ case TX_2X2: fwd_txfm_2x2(src_diff, coeff, diff_stride, txfm_param); break;
#endif
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
case TX_4X16:
- fwd_txfm_4x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_4x16(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_16X4:
- fwd_txfm_16x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_16x4(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_8X32:
- fwd_txfm_8x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_8x32(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_32X8:
- fwd_txfm_32x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ fwd_txfm_32x8(src_diff, coeff, diff_stride, txfm_param);
break;
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
default: assert(0); break;
}
}
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param) {
- const int fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
- const TX_TYPE tx_type = fwd_txfm_param->tx_type;
- const TX_SIZE tx_size = fwd_txfm_param->tx_size;
- const int lossless = fwd_txfm_param->lossless;
- const int bd = fwd_txfm_param->bd;
+ int diff_stride, TxfmParam *txfm_param) {
+ const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
- highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
- highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_16X16:
- highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_8X8:
- highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_4X8:
- highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_8X4:
- highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_8X16:
- highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_16X8:
- highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_16X32:
- highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_32X16:
- highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
- bd);
+ highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_4X4:
- highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd);
+ highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param);
break;
#if CONFIG_CHROMA_2X2
case TX_2X2:
- highbd_fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless, bd);
+ highbd_fwd_txfm_2x2(src_diff, coeff, diff_stride, txfm_param);
break;
#endif
default: assert(0); break;
}
}
-#endif // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h b/third_party/aom/av1/encoder/hybrid_fwd_txfm.h
index e6fd17275..b25ffb8d8 100644
--- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h
+++ b/third_party/aom/av1/encoder/hybrid_fwd_txfm.h
@@ -14,28 +14,15 @@
#include "./aom_config.h"
-typedef enum FWD_TXFM_OPT { FWD_TXFM_OPT_NORMAL } FWD_TXFM_OPT;
-
-typedef struct FWD_TXFM_PARAM {
- TX_TYPE tx_type;
- TX_SIZE tx_size;
- int lossless;
-#if CONFIG_HIGHBITDEPTH
- int bd;
-#endif // CONFIG_HIGHBITDEPTH
-} FWD_TXFM_PARAM;
-
#ifdef __cplusplus
extern "C" {
#endif
void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
- FWD_TXFM_PARAM *fwd_txfm_param);
+ TxfmParam *txfm_param);
-#if CONFIG_HIGHBITDEPTH
void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
- int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param);
-#endif // CONFIG_HIGHBITDEPTH
+ int diff_stride, TxfmParam *txfm_param);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/encoder/mcomp.c b/third_party/aom/av1/encoder/mcomp.c
index 52080ca0d..4efadff1b 100644
--- a/third_party/aom/av1/encoder/mcomp.c
+++ b/third_party/aom/av1/encoder/mcomp.c
@@ -228,49 +228,45 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
-static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r,
- int c) {
- return &buf[(r)*stride + (c)];
-}
-
/* checks if (r, c) has better score than previous best */
#if CONFIG_EXT_INTER
-#define CHECK_BETTER1(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- MV this_mv = { r, c }; \
- thismse = upsampled_pref_error( \
- xd, vfp, src_address, src_stride, upre(y, y_stride, r, c), y_stride, \
- second_pred, mask, mask_stride, invert_mask, w, h, &sse); \
- v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
- v += thismse; \
- if (v < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = { r, c }; \
+ thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \
+ pre(y, y_stride, r, c), y_stride, sp(c), \
+ sp(r), second_pred, mask, mask_stride, \
+ invert_mask, w, h, &sse); \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ v += thismse; \
+ if (v < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
}
#else
-#define CHECK_BETTER1(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- MV this_mv = { r, c }; \
- thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \
- upre(y, y_stride, r, c), y_stride, \
- second_pred, w, h, &sse); \
- v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
- v += thismse; \
- if (v < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = { r, c }; \
+ thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \
+ pre(y, y_stride, r, c), y_stride, sp(c), \
+ sp(r), second_pred, w, h, &sse); \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ v += thismse; \
+ if (v < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
}
#endif // CONFIG_EXT_INTER
@@ -700,16 +696,14 @@ static const MV search_step_table[12] = {
};
/* clang-format on */
-static int upsampled_pref_error(const MACROBLOCKD *xd,
- const aom_variance_fn_ptr_t *vfp,
- const uint8_t *const src, const int src_stride,
- const uint8_t *const y, int y_stride,
- const uint8_t *second_pred,
+static int upsampled_pref_error(
+ const MACROBLOCKD *xd, const aom_variance_fn_ptr_t *vfp,
+ const uint8_t *const src, const int src_stride, const uint8_t *const y,
+ int y_stride, int subpel_x_q3, int subpel_y_q3, const uint8_t *second_pred,
#if CONFIG_EXT_INTER
- const uint8_t *mask, int mask_stride,
- int invert_mask,
+ const uint8_t *mask, int mask_stride, int invert_mask,
#endif
- int w, int h, unsigned int *sse) {
+ int w, int h, unsigned int *sse) {
unsigned int besterr;
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -717,15 +711,17 @@ static int upsampled_pref_error(const MACROBLOCKD *xd,
if (second_pred != NULL) {
#if CONFIG_EXT_INTER
if (mask)
- aom_highbd_comp_mask_upsampled_pred(pred16, second_pred, w, h, y,
- y_stride, mask, mask_stride,
- invert_mask);
+ aom_highbd_comp_mask_upsampled_pred(
+ pred16, second_pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride,
+ mask, mask_stride, invert_mask, xd->bd);
else
#endif
- aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
- y_stride);
+ aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h,
+ subpel_x_q3, subpel_y_q3, y,
+ y_stride, xd->bd);
} else {
- aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
+ aom_highbd_upsampled_pred(pred16, w, h, subpel_x_q3, subpel_y_q3, y,
+ y_stride, xd->bd);
}
besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse);
@@ -738,13 +734,15 @@ static int upsampled_pref_error(const MACROBLOCKD *xd,
if (second_pred != NULL) {
#if CONFIG_EXT_INTER
if (mask)
- aom_comp_mask_upsampled_pred(pred, second_pred, w, h, y, y_stride, mask,
+ aom_comp_mask_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
+ subpel_y_q3, y, y_stride, mask,
mask_stride, invert_mask);
else
#endif
- aom_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride);
+ aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
+ subpel_y_q3, y, y_stride);
} else {
- aom_upsampled_pred(pred, w, h, y, y_stride);
+ aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride);
}
besterr = vfp->vf(pred, w, src, src_stride, sse);
@@ -764,12 +762,12 @@ static unsigned int upsampled_setup_center_error(
#endif
int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
int *distortion) {
- unsigned int besterr = upsampled_pref_error(xd, vfp, src, src_stride,
- y + offset, y_stride, second_pred,
+ unsigned int besterr = upsampled_pref_error(
+ xd, vfp, src, src_stride, y + offset, y_stride, 0, 0, second_pred,
#if CONFIG_EXT_INTER
- mask, mask_stride, invert_mask,
+ mask, mask_stride, invert_mask,
#endif
- w, h, sse1);
+ w, h, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
@@ -824,7 +822,7 @@ int av1_find_best_sub_pixel_tree(
#if CONFIG_EXT_INTER
mask, mask_stride, invert_mask,
#endif
- w, h, (offset * 8), mvjcost, mvcost, sse1, distortion);
+ w, h, offset, mvjcost, mvcost, sse1, distortion);
else
besterr =
setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
@@ -845,17 +843,15 @@ int av1_find_best_sub_pixel_tree(
MV this_mv = { tr, tc };
if (use_upsampled_ref) {
- const uint8_t *const pre_address = y + tr * y_stride + tc;
-
thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
- pre_address, y_stride, second_pred,
+ pre(y, y_stride, tr, tc), y_stride,
+ sp(tc), sp(tr), second_pred,
#if CONFIG_EXT_INTER
mask, mask_stride, invert_mask,
#endif
w, h, &sse);
} else {
- const uint8_t *const pre_address =
- y + (tr >> 3) * y_stride + (tc >> 3);
+ const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
@@ -894,16 +890,15 @@ int av1_find_best_sub_pixel_tree(
MV this_mv = { tr, tc };
if (use_upsampled_ref) {
- const uint8_t *const pre_address = y + tr * y_stride + tc;
-
thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
- pre_address, y_stride, second_pred,
+ pre(y, y_stride, tr, tc), y_stride,
+ sp(tc), sp(tr), second_pred,
#if CONFIG_EXT_INTER
mask, mask_stride, invert_mask,
#endif
w, h, &sse);
} else {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+ const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
@@ -992,9 +987,16 @@ unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
}
// Refine MV in a small range
+#if WARPED_MOTION_SORT_SAMPLES
+unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int *pts0, int *pts_inref0, int *pts_mv0,
+ int total_samples) {
+#else
unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
int *pts, int *pts_inref) {
+#endif // WARPED_MOTION_SORT_SAMPLES
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MODE_INFO *mi = xd->mi[0];
@@ -1007,6 +1009,9 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
int16_t *tr = &mbmi->mv[0].as_mv.row;
int16_t *tc = &mbmi->mv[0].as_mv.col;
WarpedMotionParams best_wm_params = mbmi->wm_params[0];
+#if WARPED_MOTION_SORT_SAMPLES
+ int best_num_proj_ref = mbmi->num_proj_ref[0];
+#endif // WARPED_MOTION_SORT_SAMPLES
unsigned int bestmse;
int minc, maxc, minr, maxr;
const int start = cm->allow_high_precision_mv ? 0 : 4;
@@ -1033,6 +1038,16 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
MV this_mv = { *tr, *tc };
+#if WARPED_MOTION_SORT_SAMPLES
+ int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+
+ memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
+ memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
+ if (total_samples > 1)
+ mbmi->num_proj_ref[0] =
+ sortSamples(pts_mv0, &this_mv, pts, pts_inref, total_samples);
+#endif // WARPED_MOTION_SORT_SAMPLES
+
if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr,
*tc, &mbmi->wm_params[0], mi_row, mi_col)) {
thismse =
@@ -1041,6 +1056,9 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
if (thismse < bestmse) {
best_idx = idx;
best_wm_params = mbmi->wm_params[0];
+#if WARPED_MOTION_SORT_SAMPLES
+ best_num_proj_ref = mbmi->num_proj_ref[0];
+#endif // WARPED_MOTION_SORT_SAMPLES
bestmse = thismse;
}
}
@@ -1058,7 +1076,9 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
*tr = br;
*tc = bc;
mbmi->wm_params[0] = best_wm_params;
-
+#if WARPED_MOTION_SORT_SAMPLES
+ mbmi->num_proj_ref[0] = best_num_proj_ref;
+#endif // WARPED_MOTION_SORT_SAMPLES
return bestmse;
}
#endif // CONFIG_WARPED_MOTION
@@ -2653,19 +2673,20 @@ int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
#undef CHECK_BETTER1
-#define CHECK_BETTER1(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- thismse = upsampled_obmc_pref_error( \
- xd, mask, vfp, z, upre(y, y_stride, r, c), y_stride, w, h, &sse); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ thismse = \
+ upsampled_obmc_pref_error(xd, mask, vfp, z, pre(y, y_stride, r, c), \
+ y_stride, sp(c), sp(r), w, h, &sse); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
}
static unsigned int setup_obmc_center_error(
@@ -2684,12 +2705,14 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, const int32_t *mask,
const aom_variance_fn_ptr_t *vfp,
const int32_t *const wsrc,
const uint8_t *const y, int y_stride,
- int w, int h, unsigned int *sse) {
+ int subpel_x_q3, int subpel_y_q3, int w,
+ int h, unsigned int *sse) {
unsigned int besterr;
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
- aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
+ aom_highbd_upsampled_pred(pred16, w, h, subpel_x_q3, subpel_y_q3, y,
+ y_stride, xd->bd);
besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse);
} else {
@@ -2698,7 +2721,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, const int32_t *mask,
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
(void)xd;
#endif // CONFIG_HIGHBITDEPTH
- aom_upsampled_pred(pred, w, h, y, y_stride);
+ aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride);
besterr = vfp->ovf(pred, w, wsrc, mask, sse);
#if CONFIG_HIGHBITDEPTH
@@ -2714,18 +2737,17 @@ static unsigned int upsampled_setup_obmc_center_error(
int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
int *distortion) {
unsigned int besterr = upsampled_obmc_pref_error(
- xd, mask, vfp, wsrc, y + offset, y_stride, w, h, sse1);
+ xd, mask, vfp, wsrc, y + offset, y_stride, 0, 0, w, h, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
}
int av1_find_best_obmc_sub_pixel_tree_up(
- const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
- const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
- int is_second, int use_upsampled_ref) {
+ MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
+ int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
+ int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, int is_second, int use_upsampled_ref) {
const int32_t *wsrc = x->wsrc_buf;
const int32_t *mask = x->mask_buf;
const int *const z = wsrc;
@@ -2756,21 +2778,11 @@ int av1_find_best_obmc_sub_pixel_tree_up(
int y_stride;
const uint8_t *y;
- const struct buf_2d backup_pred = pd->pre[is_second];
int minc, maxc, minr, maxr;
av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
ref_mv);
- if (use_upsampled_ref) {
- int ref = xd->mi[0]->mbmi.ref_frame[is_second];
- const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
- setup_pred_plane(&pd->pre[is_second], mbmi->sb_type,
- upsampled_ref->y_buffer, upsampled_ref->y_crop_width,
- upsampled_ref->y_crop_height, upsampled_ref->y_stride,
- (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
y = pd->pre[is_second].buf;
y_stride = pd->pre[is_second].stride;
offset = bestmv->row * y_stride + bestmv->col;
@@ -2784,7 +2796,7 @@ int av1_find_best_obmc_sub_pixel_tree_up(
if (use_upsampled_ref)
besterr = upsampled_setup_obmc_center_error(
xd, mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, w, h,
- (offset * 8), mvjcost, mvcost, sse1, distortion);
+ offset, mvjcost, mvcost, sse1, distortion);
else
besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
z, y, y_stride, offset, mvjcost, mvcost,
@@ -2797,15 +2809,13 @@ int av1_find_best_obmc_sub_pixel_tree_up(
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
MV this_mv = { tr, tc };
+ const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
if (use_upsampled_ref) {
- const uint8_t *const pre_address = y + tr * y_stride + tc;
-
- thismse = upsampled_obmc_pref_error(
- xd, mask, vfp, src_address, pre_address, y_stride, w, h, &sse);
+ thismse =
+ upsampled_obmc_pref_error(xd, mask, vfp, src_address, pre_address,
+ y_stride, sp(tc), sp(tr), w, h, &sse);
} else {
- const uint8_t *const pre_address =
- y + (tr >> 3) * y_stride + (tc >> 3);
thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
src_address, mask, &sse);
}
@@ -2833,15 +2843,12 @@ int av1_find_best_obmc_sub_pixel_tree_up(
MV this_mv = { tr, tc };
if (use_upsampled_ref) {
- const uint8_t *const pre_address = y + tr * y_stride + tc;
-
thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address,
- pre_address, y_stride, w, h, &sse);
+ pre(y, y_stride, tr, tc), y_stride,
+ sp(tc), sp(tr), w, h, &sse);
} else {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-
- thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- mask, &sse);
+ thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr),
+ src_address, mask, &sse);
}
cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
@@ -2889,10 +2896,6 @@ int av1_find_best_obmc_sub_pixel_tree_up(
bestmv->row = br;
bestmv->col = bc;
- if (use_upsampled_ref) {
- pd->pre[is_second] = backup_pred;
- }
-
return besterr;
}
diff --git a/third_party/aom/av1/encoder/mcomp.h b/third_party/aom/av1/encoder/mcomp.h
index 7e8b4b29d..733e415ce 100644
--- a/third_party/aom/av1/encoder/mcomp.h
+++ b/third_party/aom/av1/encoder/mcomp.h
@@ -143,11 +143,10 @@ int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
const aom_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv, int is_second);
int av1_find_best_obmc_sub_pixel_tree_up(
- const struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
- MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
- int is_second, int use_upsampled_ref);
+ MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
+ int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
+ int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, int is_second, int use_upsampled_ref);
#endif // CONFIG_MOTION_VAR
#ifdef __cplusplus
} // extern "C"
@@ -157,10 +156,18 @@ int av1_find_best_obmc_sub_pixel_tree_up(
unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi,
MACROBLOCK *const x, BLOCK_SIZE bsize,
int mi_row, int mi_col, const MV *this_mv);
+#if WARPED_MOTION_SORT_SAMPLES
+unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
+ MACROBLOCK *const x, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, int *pts0,
+ int *pts_inref0, int *pts_mv0,
+ int total_samples);
+#else
unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
MACROBLOCK *const x, BLOCK_SIZE bsize,
int mi_row, int mi_col, int *pts,
int *pts_inref);
+#endif // WARPED_MOTION_SORT_SAMPLES
#endif // CONFIG_WARPED_MOTION
#endif // AV1_ENCODER_MCOMP_H_
diff --git a/third_party/aom/av1/encoder/palette.c b/third_party/aom/av1/encoder/palette.c
index 235964dde..bac06cd17 100644
--- a/third_party/aom/av1/encoder/palette.c
+++ b/third_party/aom/av1/encoder/palette.c
@@ -145,27 +145,6 @@ int av1_remove_duplicates(float *centroids, int num_centroids) {
return num_unique;
}
-int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) {
- int n = 0, r, c, i, val_count[256];
- uint8_t val;
- memset(val_count, 0, sizeof(val_count));
-
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) {
- val = src[r * stride + c];
- ++val_count[val];
- }
- }
-
- for (i = 0; i < 256; ++i) {
- if (val_count[i]) {
- ++n;
- }
- }
-
- return n;
-}
-
#if CONFIG_PALETTE_DELTA_ENCODING
static int delta_encode_cost(const int *colors, int num, int bit_depth,
int min_val) {
@@ -291,30 +270,3 @@ int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
return 2 * bit_depth * n * av1_cost_bit(128, 0);
#endif // CONFIG_PALETTE_DELTA_ENCODING
}
-
-#if CONFIG_HIGHBITDEPTH
-int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
- int bit_depth) {
- int n = 0, r, c, i;
- uint16_t val;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- int val_count[1 << 12];
-
- assert(bit_depth <= 12);
- memset(val_count, 0, (1 << 12) * sizeof(val_count[0]));
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) {
- val = src[r * stride + c];
- ++val_count[val];
- }
- }
-
- for (i = 0; i < (1 << bit_depth); ++i) {
- if (val_count[i]) {
- ++n;
- }
- }
-
- return n;
-}
-#endif // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/av1/encoder/palette.h b/third_party/aom/av1/encoder/palette.h
index f5a3c1bdd..8afe5a782 100644
--- a/third_party/aom/av1/encoder/palette.h
+++ b/third_party/aom/av1/encoder/palette.h
@@ -36,14 +36,6 @@ void av1_k_means(const float *data, float *centroids, uint8_t *indices, int n,
// method.
int av1_remove_duplicates(float *centroids, int num_centroids);
-// Returns the number of colors in 'src'.
-int av1_count_colors(const uint8_t *src, int stride, int rows, int cols);
-#if CONFIG_HIGHBITDEPTH
-// Same as av1_count_colors(), but for high-bitdepth mode.
-int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
- int bit_depth);
-#endif // CONFIG_HIGHBITDEPTH
-
#if CONFIG_PALETTE_DELTA_ENCODING
// Given a color cache and a set of base colors, find if each cache color is
// present in the base colors, record the binary results in "cache_color_found".
diff --git a/third_party/aom/av1/encoder/pickcdef.c b/third_party/aom/av1/encoder/pickcdef.c
index da64fb48d..e4ec38826 100644
--- a/third_party/aom/av1/encoder/pickcdef.c
+++ b/third_party/aom/av1/encoder/pickcdef.c
@@ -19,13 +19,19 @@
#include "av1/common/reconinter.h"
#include "av1/encoder/encoder.h"
+#define REDUCED_STRENGTHS 8
+#define REDUCED_TOTAL_STRENGTHS (REDUCED_STRENGTHS * CLPF_STRENGTHS)
#define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS)
+static int priconv[REDUCED_STRENGTHS] = { 0, 1, 2, 3, 4, 7, 12, 25 };
+
/* Search for the best strength to add as an option, knowing we
already selected nb_strengths options. */
static uint64_t search_one(int *lev, int nb_strengths,
- uint64_t mse[][TOTAL_STRENGTHS], int sb_count) {
+ uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
+ int fast) {
uint64_t tot_mse[TOTAL_STRENGTHS];
+ const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
int i, j;
uint64_t best_tot_mse = (uint64_t)1 << 63;
int best_id = 0;
@@ -40,13 +46,13 @@ static uint64_t search_one(int *lev, int nb_strengths,
}
}
/* Find best mse when adding each possible new option. */
- for (j = 0; j < TOTAL_STRENGTHS; j++) {
+ for (j = 0; j < total_strengths; j++) {
uint64_t best = best_mse;
if (mse[i][j] < best) best = mse[i][j];
tot_mse[j] += best;
}
}
- for (j = 0; j < TOTAL_STRENGTHS; j++) {
+ for (j = 0; j < total_strengths; j++) {
if (tot_mse[j] < best_tot_mse) {
best_tot_mse = tot_mse[j];
best_id = j;
@@ -59,9 +65,10 @@ static uint64_t search_one(int *lev, int nb_strengths,
/* Search for the best luma+chroma strength to add as an option, knowing we
already selected nb_strengths options. */
static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
- uint64_t (**mse)[TOTAL_STRENGTHS],
- int sb_count) {
+ uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
+ int fast) {
uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
+ const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
int i, j;
uint64_t best_tot_mse = (uint64_t)1 << 63;
int best_id0 = 0;
@@ -79,9 +86,9 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
}
}
/* Find best mse when adding each possible new option. */
- for (j = 0; j < TOTAL_STRENGTHS; j++) {
+ for (j = 0; j < total_strengths; j++) {
int k;
- for (k = 0; k < TOTAL_STRENGTHS; k++) {
+ for (k = 0; k < total_strengths; k++) {
uint64_t best = best_mse;
uint64_t curr = mse[0][i][j];
curr += mse[1][i][k];
@@ -90,9 +97,9 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
}
}
}
- for (j = 0; j < TOTAL_STRENGTHS; j++) {
+ for (j = 0; j < total_strengths; j++) {
int k;
- for (k = 0; k < TOTAL_STRENGTHS; k++) {
+ for (k = 0; k < total_strengths; k++) {
if (tot_mse[j][k] < best_tot_mse) {
best_tot_mse = tot_mse[j][k];
best_id0 = j;
@@ -108,20 +115,23 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
/* Search for the set of strengths that minimizes mse. */
static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
uint64_t mse[][TOTAL_STRENGTHS],
- int sb_count) {
+ int sb_count, int fast) {
uint64_t best_tot_mse;
int i;
best_tot_mse = (uint64_t)1 << 63;
/* Greedy search: add one strength options at a time. */
for (i = 0; i < nb_strengths; i++) {
- best_tot_mse = search_one(best_lev, i, mse, sb_count);
+ best_tot_mse = search_one(best_lev, i, mse, sb_count, fast);
}
/* Trying to refine the greedy search by reconsidering each
already-selected option. */
- for (i = 0; i < 4 * nb_strengths; i++) {
- int j;
- for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
- best_tot_mse = search_one(best_lev, nb_strengths - 1, mse, sb_count);
+ if (!fast) {
+ for (i = 0; i < 4 * nb_strengths; i++) {
+ int j;
+ for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
+ best_tot_mse =
+ search_one(best_lev, nb_strengths - 1, mse, sb_count, fast);
+ }
}
return best_tot_mse;
}
@@ -130,13 +140,14 @@ static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
int nb_strengths,
uint64_t (**mse)[TOTAL_STRENGTHS],
- int sb_count) {
+ int sb_count, int fast) {
uint64_t best_tot_mse;
int i;
best_tot_mse = (uint64_t)1 << 63;
/* Greedy search: add one strength options at a time. */
for (i = 0; i < nb_strengths; i++) {
- best_tot_mse = search_one_dual(best_lev0, best_lev1, i, mse, sb_count);
+ best_tot_mse =
+ search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast);
}
/* Trying to refine the greedy search by reconsidering each
already-selected option. */
@@ -146,8 +157,8 @@ static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
best_lev0[j] = best_lev0[j + 1];
best_lev1[j] = best_lev1[j + 1];
}
- best_tot_mse =
- search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, sb_count);
+ best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
+ sb_count, fast);
}
return best_tot_mse;
}
@@ -269,12 +280,12 @@ uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
}
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
- AV1_COMMON *cm, MACROBLOCKD *xd) {
+ AV1_COMMON *cm, MACROBLOCKD *xd, int fast) {
int r, c;
int sbr, sbc;
uint16_t *src[3];
uint16_t *ref_coeff[3];
- dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride[3];
@@ -289,8 +300,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
uint64_t best_tot_mse = (uint64_t)1 << 63;
uint64_t tot_mse;
int sb_count;
- int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
- int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ int nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ int nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
uint64_t(*mse[2])[TOTAL_STRENGTHS];
@@ -302,6 +313,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int quantizer;
double lambda;
int nplanes = 3;
+ const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]);
uint16_t *in;
DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SQUARE]);
@@ -375,22 +387,23 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int nvb, nhb;
int gi;
int dirinit = 0;
- nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
- nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
- cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
- MAX_MIB_SIZE * sbc]
+ nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
+ nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
+ cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
+ MI_SIZE_64X64 * sbc]
->mbmi.cdef_strength = -1;
- if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
- dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
- sbc * MAX_MIB_SIZE, dlist, 1);
+ if (sb_all_skip(cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64)) continue;
+ dering_count = sb_compute_dering_list(cm, sbr * MI_SIZE_64X64,
+ sbc * MI_SIZE_64X64, dlist, 1);
for (pli = 0; pli < nplanes; pli++) {
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
inbuf[i] = OD_DERING_VERY_LARGE;
- for (gi = 0; gi < TOTAL_STRENGTHS; gi++) {
+ for (gi = 0; gi < total_strengths; gi++) {
int threshold;
uint64_t curr_mse;
int clpf_strength;
threshold = gi / CLPF_STRENGTHS;
+ if (fast) threshold = priconv[threshold];
if (pli > 0 && !chroma_dering) threshold = 0;
/* We avoid filtering the pixels for which some of the pixels to
average
@@ -406,8 +419,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
if (clpf_strength == 0)
copy_sb16_16(&in[(-yoff * OD_FILT_BSTRIDE - xoff)], OD_FILT_BSTRIDE,
src[pli],
- (sbr * MAX_MIB_SIZE << mi_high_l2[pli]) - yoff,
- (sbc * MAX_MIB_SIZE << mi_wide_l2[pli]) - xoff,
+ (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
+ (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
stride[pli], ysize, xsize);
od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE,
tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var, pli,
@@ -416,8 +429,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
dering_damping, coeff_shift, clpf_strength != 0, 1);
curr_mse = compute_dering_dist(
ref_coeff[pli] +
- (sbr * MAX_MIB_SIZE << mi_high_l2[pli]) * stride[pli] +
- (sbc * MAX_MIB_SIZE << mi_wide_l2[pli]),
+ (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
+ (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
stride[pli], tmp_dst, dlist, dering_count, bsize[pli],
coeff_shift, pli);
if (pli < 2)
@@ -425,7 +438,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
else
mse[1][sb_count][gi] += curr_mse;
sb_index[sb_count] =
- MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
+ MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
}
}
sb_count++;
@@ -440,10 +453,10 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
nb_strengths = 1 << i;
if (nplanes >= 3)
tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
- mse, sb_count);
+ mse, sb_count, fast);
else
- tot_mse =
- joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count);
+ tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
+ fast);
/* Count superblock signalling cost. */
tot_mse += (uint64_t)(sb_count * lambda * i);
/* Count header signalling cost. */
@@ -477,6 +490,17 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
selected_strength[i] = best_gi;
cm->mi_grid_visible[sb_index[i]]->mbmi.cdef_strength = best_gi;
}
+
+ if (fast) {
+ for (int j = 0; j < nb_strengths; j++) {
+ cm->cdef_strengths[j] =
+ priconv[cm->cdef_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS +
+ (cm->cdef_strengths[j] % CLPF_STRENGTHS);
+ cm->cdef_uv_strengths[j] =
+ priconv[cm->cdef_uv_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS +
+ (cm->cdef_uv_strengths[j] % CLPF_STRENGTHS);
+ }
+ }
cm->cdef_dering_damping = dering_damping;
cm->cdef_clpf_damping = clpf_damping;
aom_free(mse[0]);
diff --git a/third_party/aom/av1/encoder/picklpf.c b/third_party/aom/av1/encoder/picklpf.c
index fc0ea485d..26fd55ef0 100644
--- a/third_party/aom/av1/encoder/picklpf.c
+++ b/third_party/aom/av1/encoder/picklpf.c
@@ -38,13 +38,23 @@ int av1_get_max_filter_level(const AV1_COMP *cpi) {
static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
AV1_COMP *const cpi, int filt_level,
- int partial_frame) {
+ int partial_frame
+#if CONFIG_UV_LVL
+ ,
+ int plane
+#endif
+ ) {
AV1_COMMON *const cm = &cpi->common;
int64_t filt_err;
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4
+#if CONFIG_UV_LVL
+ av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
+ plane, partial_frame);
+#else
av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1,
partial_frame);
+#endif // CONFIG_UV_LVL
#else
if (cpi->num_workers > 1)
av1_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
@@ -55,6 +65,40 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
1, partial_frame);
#endif
+#if CONFIG_UV_LVL
+#if CONFIG_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ if (plane == 0)
+ filt_err = aom_highbd_get_y_sse(sd, cm->frame_to_show);
+ else if (plane == 1)
+ filt_err = aom_highbd_get_u_sse(sd, cm->frame_to_show);
+ else
+ filt_err = aom_highbd_get_v_sse(sd, cm->frame_to_show);
+ } else {
+ if (plane == 0)
+ filt_err = aom_get_y_sse(sd, cm->frame_to_show);
+ else if (plane == 1)
+ filt_err = aom_get_u_sse(sd, cm->frame_to_show);
+ else
+ filt_err = aom_get_v_sse(sd, cm->frame_to_show);
+ }
+#else
+ if (plane == 0)
+ filt_err = aom_get_y_sse(sd, cm->frame_to_show);
+ else if (plane == 1)
+ filt_err = aom_get_u_sse(sd, cm->frame_to_show);
+ else
+ filt_err = aom_get_v_sse(sd, cm->frame_to_show);
+#endif // CONFIG_HIGHBITDEPTH
+
+ // Re-instate the unfiltered frame
+ if (plane == 0)
+ aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+ else if (plane == 1)
+ aom_yv12_copy_u(&cpi->last_frame_uf, cm->frame_to_show);
+ else
+ aom_yv12_copy_v(&cpi->last_frame_uf, cm->frame_to_show);
+#else
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) {
filt_err = aom_highbd_get_y_sse(sd, cm->frame_to_show);
@@ -67,12 +111,18 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
// Re-instate the unfiltered frame
aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+#endif // CONFIG_UV_LVL
return filt_err;
}
int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
- int partial_frame, double *best_cost_ret) {
+ int partial_frame, double *best_cost_ret
+#if CONFIG_UV_LVL
+ ,
+ int plane
+#endif
+ ) {
const AV1_COMMON *const cm = &cpi->common;
const struct loopfilter *const lf = &cm->lf;
const int min_filter_level = 0;
@@ -82,9 +132,20 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
int filt_best;
MACROBLOCK *x = &cpi->td.mb;
- // Start the search at the previous frame filter level unless it is now out of
- // range.
+// Start the search at the previous frame filter level unless it is now out of
+// range.
+#if CONFIG_UV_LVL
+ int lvl;
+ switch (plane) {
+ case 0: lvl = lf->filter_level; break;
+ case 1: lvl = lf->filter_level_u; break;
+ case 2: lvl = lf->filter_level_v; break;
+ default: lvl = lf->filter_level; break;
+ }
+ int filt_mid = clamp(lvl, min_filter_level, max_filter_level);
+#else
int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
+#endif // CONFIG_UV_LVL
int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
// Sum squared error at each filter level
int64_t ss_err[MAX_LOOP_FILTER + 1];
@@ -92,10 +153,23 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
// Set each entry to -1
memset(ss_err, 0xFF, sizeof(ss_err));
+#if CONFIG_UV_LVL
+ if (plane == 0)
+ aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+ else if (plane == 1)
+ aom_yv12_copy_u(cm->frame_to_show, &cpi->last_frame_uf);
+ else if (plane == 2)
+ aom_yv12_copy_v(cm->frame_to_show, &cpi->last_frame_uf);
+#else
// Make a copy of the unfiltered / processed recon buffer
aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+#endif // CONFIG_UV_LVL
+#if CONFIG_UV_LVL
+ best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane);
+#else
best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame);
+#endif // CONFIG_UV_LVL
filt_best = filt_mid;
ss_err[filt_mid] = best_err;
@@ -115,7 +189,12 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
if (filt_direction <= 0 && filt_low != filt_mid) {
// Get Low filter error score
if (ss_err[filt_low] < 0) {
+#if CONFIG_UV_LVL
+ ss_err[filt_low] =
+ try_filter_frame(sd, cpi, filt_low, partial_frame, plane);
+#else
ss_err[filt_low] = try_filter_frame(sd, cpi, filt_low, partial_frame);
+#endif // CONFIG_UV_LVL
}
// If value is close to the best so far then bias towards a lower loop
// filter value.
@@ -131,7 +210,12 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
// Now look at filt_high
if (filt_direction >= 0 && filt_high != filt_mid) {
if (ss_err[filt_high] < 0) {
+#if CONFIG_UV_LVL
+ ss_err[filt_high] =
+ try_filter_frame(sd, cpi, filt_high, partial_frame, plane);
+#else
ss_err[filt_high] = try_filter_frame(sd, cpi, filt_high, partial_frame);
+#endif // CONFIG_UV_LVL
}
// If value is significantly better than previous best, bias added against
// raising filter value
@@ -154,8 +238,7 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
// Update best error
best_err = ss_err[filt_best];
- if (best_cost_ret)
- *best_cost_ret = RDCOST_DBL(x->rdmult, x->rddiv, 0, best_err);
+ if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err);
return filt_best;
}
@@ -198,14 +281,16 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
} else {
+#if CONFIG_UV_LVL
+ lf->filter_level = av1_search_filter_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0);
+ lf->filter_level_u = av1_search_filter_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 1);
+ lf->filter_level_v = av1_search_filter_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 2);
+#else
lf->filter_level = av1_search_filter_level(
sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL);
+#endif // CONFIG_UV_LVL
}
-
-#if CONFIG_EXT_TILE
- // TODO(any): 0 loopfilter level is only necessary if individual tile
- // decoding is required. We need to communicate this requirement to this
- // code and force loop filter level 0 only if required.
- if (cm->tile_encoding_mode) lf->filter_level = 0;
-#endif // CONFIG_EXT_TILE
}
diff --git a/third_party/aom/av1/encoder/picklpf.h b/third_party/aom/av1/encoder/picklpf.h
index 3c0a83462..bd248d114 100644
--- a/third_party/aom/av1/encoder/picklpf.h
+++ b/third_party/aom/av1/encoder/picklpf.h
@@ -21,8 +21,13 @@ extern "C" {
struct yv12_buffer_config;
struct AV1_COMP;
int av1_get_max_filter_level(const AV1_COMP *cpi);
+#if CONFIG_UV_LVL
+int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
+ int partial_frame, double *err, int plane);
+#else
int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
int partial_frame, double *err);
+#endif
void av1_pick_filter_level(const struct yv12_buffer_config *sd,
struct AV1_COMP *cpi, LPF_PICK_METHOD method);
#ifdef __cplusplus
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c
index 4a446d24e..fec68377a 100644
--- a/third_party/aom/av1/encoder/pickrst.c
+++ b/third_party/aom/av1/encoder/pickrst.c
@@ -437,8 +437,8 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
int width, height, src_stride, dgd_stride;
uint8_t *dgd_buffer, *src_buffer;
if (plane == AOM_PLANE_Y) {
- width = cm->width;
- height = cm->height;
+ width = src->y_crop_width;
+ height = src->y_crop_height;
src_buffer = src->y_buffer;
src_stride = src->y_stride;
dgd_buffer = dgd->y_buffer;
@@ -478,7 +478,7 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
(1 << plane));
// #bits when a tile is not restored
bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0);
- cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+ cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
best_tile_cost[tile_idx] = DBL_MAX;
search_selfguided_restoration(
dgd_buffer + v_start * dgd_stride + h_start, h_end - h_start,
@@ -498,7 +498,7 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
&ref_sgrproj_info)
<< AV1_PROB_COST_SHIFT;
bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1);
- cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+ cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err);
if (cost_sgrproj >= cost_norestore) {
type[tile_idx] = RESTORE_NONE;
} else {
@@ -531,7 +531,7 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
}
err = try_restoration_frame(src, cpi, rsi, (1 << plane), partial_frame,
dst_frame);
- cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+ cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err);
return cost_sgrproj;
}
@@ -985,8 +985,8 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
int width, height, src_stride, dgd_stride;
uint8_t *dgd_buffer, *src_buffer;
if (plane == AOM_PLANE_Y) {
- width = cm->width;
- height = cm->height;
+ width = src->y_crop_width;
+ height = src->y_crop_height;
src_buffer = src->y_buffer;
src_stride = src->y_stride;
dgd_buffer = dgd->y_buffer;
@@ -1039,7 +1039,7 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
(1 << plane));
// #bits when a tile is not restored
bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0);
- cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+ cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
best_tile_cost[tile_idx] = DBL_MAX;
av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
@@ -1081,7 +1081,7 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info)
<< AV1_PROB_COST_SHIFT;
bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1);
- cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+ cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err);
if (cost_wiener >= cost_norestore) {
type[tile_idx] = RESTORE_NONE;
} else {
@@ -1114,7 +1114,7 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
}
err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame,
dst_frame);
- cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+ cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err);
return cost_wiener;
}
@@ -1133,8 +1133,8 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
int h_start, h_end, v_start, v_end;
int width, height;
if (plane == AOM_PLANE_Y) {
- width = cm->width;
- height = cm->height;
+ width = src->y_crop_width;
+ height = src->y_crop_height;
} else {
width = src->uv_crop_width;
height = src->uv_crop_height;
@@ -1160,13 +1160,14 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
// RD cost associated with no restoration
err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane));
bits = frame_level_restore_bits[RESTORE_NONE] << AV1_PROB_COST_SHIFT;
- cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
+ cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
return cost_norestore;
}
static double search_switchable_restoration(
- AV1_COMP *cpi, int partial_frame, int plane, RestorationInfo *rsi,
- double *tile_cost[RESTORE_SWITCHABLE_TYPES]) {
+ const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane,
+ RestorationType *const restore_types[RESTORE_SWITCHABLE_TYPES],
+ double *const tile_cost[RESTORE_SWITCHABLE_TYPES], RestorationInfo *rsi) {
AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *x = &cpi->td.mb;
double cost_switchable = 0;
@@ -1174,11 +1175,11 @@ static double search_switchable_restoration(
RestorationType r;
int width, height;
if (plane == AOM_PLANE_Y) {
- width = cm->width;
- height = cm->height;
+ width = src->y_crop_width;
+ height = src->y_crop_height;
} else {
- width = ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x);
- height = ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y);
+ width = src->uv_crop_width;
+ height = src->uv_crop_height;
}
const int ntiles = av1_get_rest_ntiles(
width, height, cm->rst_info[plane].restoration_tilesize, NULL, NULL, NULL,
@@ -1192,16 +1193,17 @@ static double search_switchable_restoration(
rsi->frame_restoration_type = RESTORE_SWITCHABLE;
bits = frame_level_restore_bits[rsi->frame_restoration_type]
<< AV1_PROB_COST_SHIFT;
- cost_switchable = RDCOST_DBL(x->rdmult, x->rddiv, bits >> 4, 0);
+ cost_switchable = RDCOST_DBL(x->rdmult, bits >> 4, 0);
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
- double best_cost = RDCOST_DBL(
- x->rdmult, x->rddiv, (cpi->switchable_restore_cost[RESTORE_NONE] >> 4),
- tile_cost[RESTORE_NONE][tile_idx]);
+ double best_cost =
+ RDCOST_DBL(x->rdmult, (cpi->switchable_restore_cost[RESTORE_NONE] >> 4),
+ tile_cost[RESTORE_NONE][tile_idx]);
rsi->restoration_type[tile_idx] = RESTORE_NONE;
for (r = 1; r < RESTORE_SWITCHABLE_TYPES; r++) {
if (force_restore_type != 0)
if (r != force_restore_type) continue;
int tilebits = 0;
+ if (restore_types[r][tile_idx] != r) continue;
if (r == RESTORE_WIENER)
tilebits +=
count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info);
@@ -1210,8 +1212,8 @@ static double search_switchable_restoration(
count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info);
tilebits <<= AV1_PROB_COST_SHIFT;
tilebits += cpi->switchable_restore_cost[r];
- double cost = RDCOST_DBL(x->rdmult, x->rddiv, tilebits >> 4,
- tile_cost[r][tile_idx]);
+ double cost =
+ RDCOST_DBL(x->rdmult, tilebits >> 4, tile_cost[r][tile_idx]);
if (cost < best_cost) {
rsi->restoration_type[tile_idx] = r;
@@ -1243,14 +1245,17 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
RestorationType *restore_types[RESTORE_SWITCHABLE_TYPES];
double best_cost_restore;
RestorationType r, best_restore;
-
- const int ntiles_y = av1_get_rest_ntiles(cm->width, cm->height,
- cm->rst_info[0].restoration_tilesize,
- NULL, NULL, NULL, NULL);
+ const int ywidth = src->y_crop_width;
+ const int yheight = src->y_crop_height;
+ const int uvwidth = src->uv_crop_width;
+ const int uvheight = src->uv_crop_height;
+
+ const int ntiles_y =
+ av1_get_rest_ntiles(ywidth, yheight, cm->rst_info[0].restoration_tilesize,
+ NULL, NULL, NULL, NULL);
const int ntiles_uv = av1_get_rest_ntiles(
- ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x),
- ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y),
- cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL);
+ uvwidth, uvheight, cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL,
+ NULL);
// Assume ntiles_uv is never larger that ntiles_y and so the same arrays work.
for (r = 0; r < RESTORE_SWITCHABLE_TYPES; r++) {
@@ -1270,9 +1275,9 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
tile_cost[r], &cpi->trial_frame_rst);
}
if (plane == AOM_PLANE_Y)
- cost_restore[RESTORE_SWITCHABLE] =
- search_switchable_restoration(cpi, method == LPF_PICK_FROM_SUBIMAGE,
- plane, &cm->rst_info[plane], tile_cost);
+ cost_restore[RESTORE_SWITCHABLE] = search_switchable_restoration(
+ src, cpi, method == LPF_PICK_FROM_SUBIMAGE, plane, restore_types,
+ tile_cost, &cm->rst_info[plane]);
else
cost_restore[RESTORE_SWITCHABLE] = DBL_MAX;
best_cost_restore = DBL_MAX;
diff --git a/third_party/aom/av1/encoder/ransac.c b/third_party/aom/av1/encoder/ransac.c
index bbd2d179c..c6e3675be 100644
--- a/third_party/aom/av1/encoder/ransac.c
+++ b/third_party/aom/av1/encoder/ransac.c
@@ -139,6 +139,8 @@ static void normalize_homography(double *pts, int n, double *T) {
double msqe = 0;
double scale;
int i;
+
+ assert(n > 0);
for (i = 0; i < n; ++i, p += 2) {
mean[0] += p[0];
mean[1] += p[1];
@@ -821,13 +823,15 @@ static int ransac(const int *matched_points, int npoints,
// Recompute the motions using only the inliers.
for (i = 0; i < num_desired_motions; ++i) {
- copy_points_at_indices(points1, corners1, motions[i].inlier_indices,
- motions[i].num_inliers);
- copy_points_at_indices(points2, corners2, motions[i].inlier_indices,
- motions[i].num_inliers);
-
- find_transformation(motions[i].num_inliers, points1, points2,
- params_by_motion + (MAX_PARAMDIM - 1) * i);
+ if (motions[i].num_inliers >= minpts) {
+ copy_points_at_indices(points1, corners1, motions[i].inlier_indices,
+ motions[i].num_inliers);
+ copy_points_at_indices(points2, corners2, motions[i].inlier_indices,
+ motions[i].num_inliers);
+
+ find_transformation(motions[i].num_inliers, points1, points2,
+ params_by_motion + (MAX_PARAMDIM - 1) * i);
+ }
num_inliers_by_motion[i] = motions[i].num_inliers;
}
diff --git a/third_party/aom/av1/encoder/ratectrl.c b/third_party/aom/av1/encoder/ratectrl.c
index 4552c674e..b546fdffa 100644
--- a/third_party/aom/av1/encoder/ratectrl.c
+++ b/third_party/aom/av1/encoder/ratectrl.c
@@ -94,8 +94,8 @@ static int kf_high = 5000;
static int kf_low = 400;
double av1_resize_rate_factor(const AV1_COMP *cpi) {
- return (double)(cpi->resize_scale_den * cpi->resize_scale_den) /
- (cpi->resize_scale_num * cpi->resize_scale_num);
+ return (double)(cpi->oxcf.width * cpi->oxcf.height) /
+ (cpi->common.width * cpi->common.height);
}
// Functions to compute the active minq lookup table entries based on a
@@ -1081,7 +1081,7 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
}
// Modify active_best_quality for downscaled normal frames.
- if (!av1_resize_unscaled(cpi) && !frame_is_kf_gf_arf(cpi)) {
+ if (!av1_frame_unscaled(cm) && !frame_is_kf_gf_arf(cpi)) {
int qdelta = av1_compute_qdelta_by_rate(
rc, cm->frame_type, active_best_quality, 2.0, cm->bit_depth);
active_best_quality =
@@ -1164,7 +1164,7 @@ void av1_rc_set_frame_target(AV1_COMP *cpi, int target) {
rc->this_frame_target = target;
// Modify frame size target when down-scaled.
- if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && !av1_resize_unscaled(cpi))
+ if (!av1_frame_unscaled(cm))
rc->this_frame_target =
(int)(rc->this_frame_target * av1_resize_rate_factor(cpi));
@@ -1663,3 +1663,64 @@ void av1_set_target_rate(AV1_COMP *cpi) {
vbr_rate_correction(cpi, &target_rate);
av1_rc_set_frame_target(cpi, target_rate);
}
+
+static unsigned int lcg_rand16(unsigned int *state) {
+ *state = (unsigned int)(*state * 1103515245ULL + 12345);
+ return *state / 65536 % 32768;
+}
+
+uint8_t av1_calculate_next_resize_scale(const AV1_COMP *cpi) {
+ static unsigned int seed = 56789;
+ const AV1EncoderConfig *oxcf = &cpi->oxcf;
+ if (oxcf->pass == 1) return SCALE_DENOMINATOR;
+ uint8_t new_num = SCALE_DENOMINATOR;
+
+ switch (oxcf->resize_mode) {
+ case RESIZE_NONE: new_num = SCALE_DENOMINATOR; break;
+ case RESIZE_FIXED:
+ if (cpi->common.frame_type == KEY_FRAME)
+ new_num = oxcf->resize_kf_scale_numerator;
+ else
+ new_num = oxcf->resize_scale_numerator;
+ break;
+ case RESIZE_DYNAMIC:
+ // RESIZE_DYNAMIC: Just random for now.
+ new_num = lcg_rand16(&seed) % 4 + 13;
+ break;
+ default: assert(0);
+ }
+ return new_num;
+}
+
+#if CONFIG_FRAME_SUPERRES
+// TODO(afergs): Rename av1_rc_update_superres_scale(...)?
+uint8_t av1_calculate_next_superres_scale(const AV1_COMP *cpi, int width,
+ int height) {
+ static unsigned int seed = 34567;
+ const AV1EncoderConfig *oxcf = &cpi->oxcf;
+ if (oxcf->pass == 1) return SCALE_DENOMINATOR;
+ uint8_t new_num = SCALE_DENOMINATOR;
+
+ switch (oxcf->superres_mode) {
+ case SUPERRES_NONE: new_num = SCALE_DENOMINATOR; break;
+ case SUPERRES_FIXED:
+ if (cpi->common.frame_type == KEY_FRAME)
+ new_num = oxcf->superres_kf_scale_numerator;
+ else
+ new_num = oxcf->superres_scale_numerator;
+ break;
+ case SUPERRES_DYNAMIC:
+ // SUPERRES_DYNAMIC: Just random for now.
+ new_num = lcg_rand16(&seed) % 9 + 8;
+ break;
+ default: assert(0);
+ }
+
+ // Make sure overall reduction is no more than 1/2 of the source size.
+ av1_calculate_scaled_size(&width, &height, new_num);
+ if (width * 2 < oxcf->width || height * 2 < oxcf->height)
+ new_num = SCALE_DENOMINATOR;
+
+ return new_num;
+}
+#endif // CONFIG_FRAME_SUPERRES
diff --git a/third_party/aom/av1/encoder/ratectrl.h b/third_party/aom/av1/encoder/ratectrl.h
index 61bb0c224..4ebdfadd6 100644
--- a/third_party/aom/av1/encoder/ratectrl.h
+++ b/third_party/aom/av1/encoder/ratectrl.h
@@ -256,6 +256,11 @@ void av1_set_target_rate(struct AV1_COMP *cpi);
int av1_resize_one_pass_cbr(struct AV1_COMP *cpi);
+uint8_t av1_calculate_next_resize_scale(const struct AV1_COMP *cpi);
+#if CONFIG_FRAME_SUPERRES
+uint8_t av1_calculate_next_superres_scale(const struct AV1_COMP *cpi, int width,
+ int height);
+#endif // CONFIG_FRAME_SUPERRES
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/encoder/rd.c b/third_party/aom/av1/encoder/rd.c
index 94c3bb96d..da3b6e209 100644
--- a/third_party/aom/av1/encoder/rd.c
+++ b/third_party/aom/av1/encoder/rd.c
@@ -50,14 +50,15 @@
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for block size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
-static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
-#if CONFIG_CB4X4
+static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2, 2, 2,
#endif
- 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
+ 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
#if CONFIG_EXT_PARTITION
- 48, 48, 64
+ 48, 48, 64,
#endif // CONFIG_EXT_PARTITION
+ 4, 4, 8, 8
};
static void fill_mode_costs(AV1_COMP *cpi) {
@@ -66,16 +67,16 @@ static void fill_mode_costs(AV1_COMP *cpi) {
for (i = 0; i < INTRA_MODES; ++i)
for (j = 0; j < INTRA_MODES; ++j)
- av1_cost_tokens(cpi->y_mode_costs[i][j], av1_kf_y_mode_prob[i][j],
- av1_intra_mode_tree);
+ av1_cost_tokens_from_cdf(cpi->y_mode_costs[i][j], av1_kf_y_mode_cdf[i][j],
+ av1_intra_mode_inv);
for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
- av1_cost_tokens(cpi->mbmode_cost[i], fc->y_mode_prob[i],
- av1_intra_mode_tree);
+ av1_cost_tokens_from_cdf(cpi->mbmode_cost[i], fc->y_mode_cdf[i],
+ av1_intra_mode_inv);
for (i = 0; i < INTRA_MODES; ++i)
- av1_cost_tokens(cpi->intra_uv_mode_cost[i], fc->uv_mode_prob[i],
- av1_intra_mode_tree);
+ av1_cost_tokens_from_cdf(cpi->intra_uv_mode_cost[i], fc->uv_mode_cdf[i],
+ av1_intra_mode_inv);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
av1_cost_tokens(cpi->switchable_interp_costs[i],
@@ -83,20 +84,18 @@ static void fill_mode_costs(AV1_COMP *cpi) {
#if CONFIG_PALETTE
for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) {
- av1_cost_tokens(cpi->palette_y_size_cost[i],
- av1_default_palette_y_size_prob[i], av1_palette_size_tree);
- av1_cost_tokens(cpi->palette_uv_size_cost[i],
- av1_default_palette_uv_size_prob[i], av1_palette_size_tree);
+ av1_cost_tokens_from_cdf(cpi->palette_y_size_cost[i],
+ fc->palette_y_size_cdf[i], NULL);
+ av1_cost_tokens_from_cdf(cpi->palette_uv_size_cost[i],
+ fc->palette_uv_size_cdf[i], NULL);
}
for (i = 0; i < PALETTE_SIZES; ++i) {
for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
- av1_cost_tokens(cpi->palette_y_color_cost[i][j],
- av1_default_palette_y_color_index_prob[i][j],
- av1_palette_color_index_tree[i]);
- av1_cost_tokens(cpi->palette_uv_color_cost[i][j],
- av1_default_palette_uv_color_index_prob[i][j],
- av1_palette_color_index_tree[i]);
+ av1_cost_tokens_from_cdf(cpi->palette_y_color_cost[i][j],
+ fc->palette_y_color_index_cdf[i][j], NULL);
+ av1_cost_tokens_from_cdf(cpi->palette_uv_color_cost[i][j],
+ fc->palette_uv_color_index_cdf[i][j], NULL);
}
}
#endif // CONFIG_PALETTE
@@ -147,8 +146,9 @@ static void fill_mode_costs(AV1_COMP *cpi) {
av1_switchable_restore_tree);
#endif // CONFIG_LOOP_RESTORATION
#if CONFIG_GLOBAL_MOTION
- av1_cost_tokens(cpi->gmtype_cost, fc->global_motion_types_prob,
- av1_global_motion_types_tree);
+ for (i = 0; i < TRANS_TYPES; ++i)
+ cpi->gmtype_cost[i] = (1 + (i > 0 ? GLOBAL_TYPE_BITS : 0))
+ << AV1_PROB_COST_SHIFT;
#endif // CONFIG_GLOBAL_MOTION
}
@@ -301,7 +301,7 @@ static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
0, MAXQ);
const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
- for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
+ for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
// Threshold here seems unnecessarily harsh but fine given actual
// range of values used for cpi->sf.thresh_mult[].
const int t = q * rd_thresh_block_size_factor[bsize];
@@ -350,7 +350,6 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
aom_clear_system_state();
- rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
rd->RDMULT = av1_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
set_error_per_bit(x, rd->RDMULT);
@@ -367,6 +366,16 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
x->mvcost = x->mv_cost_stack[0];
x->nmvjointcost = x->nmv_vec_cost[0];
+#if CONFIG_INTRABC
+ if (frame_is_intra_only(cm) && cm->allow_screen_content_tools &&
+ cpi->oxcf.pass != 1) {
+ av1_build_nmv_cost_table(
+ x->nmv_vec_cost[0],
+ cm->allow_high_precision_mv ? x->nmvcost_hp[0] : x->nmvcost[0],
+ &cm->fc->ndvc, MV_SUBPEL_NONE);
+ }
+#endif
+
if (cpi->oxcf.pass != 1) {
av1_fill_token_costs(x->token_costs, cm->fc->coef_probs);
@@ -434,6 +443,12 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
av1_cost_tokens((int *)cpi->inter_compound_mode_cost[i],
cm->fc->inter_compound_mode_probs[i],
av1_inter_compound_mode_tree);
+#if CONFIG_COMPOUND_SINGLEREF
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ av1_cost_tokens((int *)cpi->inter_singleref_comp_mode_cost[i],
+ cm->fc->inter_singleref_comp_mode_probs[i],
+ av1_inter_singleref_comp_mode_tree);
+#endif // CONFIG_COMPOUND_SINGLEREF
#if CONFIG_INTERINTRA
for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
av1_cost_tokens((int *)cpi->interintra_mode_cost[i],
@@ -442,16 +457,22 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
#endif // CONFIG_INTERINTRA
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) {
+ for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
av1_cost_tokens((int *)cpi->motion_mode_cost[i],
cm->fc->motion_mode_prob[i], av1_motion_mode_tree);
}
#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
- for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) {
+ for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
cpi->motion_mode_cost1[i][0] = av1_cost_bit(cm->fc->obmc_prob[i], 0);
cpi->motion_mode_cost1[i][1] = av1_cost_bit(cm->fc->obmc_prob[i], 1);
}
#endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
+ for (i = ADAPT_OVERLAP_BLOCK_8X8; i < ADAPT_OVERLAP_BLOCKS; ++i) {
+ av1_cost_tokens((int *)cpi->ncobmc_mode_cost[i],
+ cm->fc->ncobmc_mode_prob[i], av1_ncobmc_mode_tree);
+ }
+#endif
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
}
}
@@ -648,7 +669,7 @@ static void get_entropy_contexts_plane(
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
case TX_4X16:
for (i = 0; i < num_4x4_w; i += 2)
t_above[i] = !!*(const uint16_t *)&above[i];
@@ -675,7 +696,7 @@ static void get_entropy_contexts_plane(
for (i = 0; i < num_4x4_h; i += 4)
t_left[i] = !!*(const uint32_t *)&left[i];
break;
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
default: assert(0 && "Invalid transform size."); break;
}
@@ -749,7 +770,7 @@ static void get_entropy_contexts_plane(
for (i = 0; i < num_4x4_h; i += 4)
t_left[i] = !!*(const uint32_t *)&left[i];
break;
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
case TX_4X16:
memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
for (i = 0; i < num_4x4_h; i += 4)
@@ -772,7 +793,7 @@ static void get_entropy_contexts_plane(
for (i = 0; i < num_4x4_h; i += 2)
t_left[i] = !!*(const uint16_t *)&left[i];
break;
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
default: assert(0 && "Invalid transform size."); break;
}
}
@@ -781,7 +802,7 @@ void av1_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd,
ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE],
ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) {
-#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
+#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#else
@@ -983,6 +1004,54 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ rd->thresh_mult[THR_SR_NEAREST_NEARMV] += 1200;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_NEAREST_NEARL2] += 1200;
+ rd->thresh_mult[THR_SR_NEAREST_NEARL3] += 1200;
+ rd->thresh_mult[THR_SR_NEAREST_NEARB] += 1200;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_NEAREST_NEARA] += 1200;
+ rd->thresh_mult[THR_SR_NEAREST_NEARG] += 1200;
+
+ /*
+ rd->thresh_mult[THR_SR_NEAREST_NEWMV] += 1200;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_NEAREST_NEWL2] += 1200;
+ rd->thresh_mult[THR_SR_NEAREST_NEWL3] += 1200;
+ rd->thresh_mult[THR_SR_NEAREST_NEWB] += 1200;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_NEAREST_NEWA] += 1200;
+ rd->thresh_mult[THR_SR_NEAREST_NEWG] += 1200;*/
+
+ rd->thresh_mult[THR_SR_NEAR_NEWMV] += 1500;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_NEAR_NEWL2] += 1500;
+ rd->thresh_mult[THR_SR_NEAR_NEWL3] += 1500;
+ rd->thresh_mult[THR_SR_NEAR_NEWB] += 1500;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_NEAR_NEWA] += 1500;
+ rd->thresh_mult[THR_SR_NEAR_NEWG] += 1500;
+
+ rd->thresh_mult[THR_SR_ZERO_NEWMV] += 2000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_ZERO_NEWL2] += 2000;
+ rd->thresh_mult[THR_SR_ZERO_NEWL3] += 2000;
+ rd->thresh_mult[THR_SR_ZERO_NEWB] += 2000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_ZERO_NEWA] += 2000;
+ rd->thresh_mult[THR_SR_ZERO_NEWG] += 2000;
+
+ rd->thresh_mult[THR_SR_NEW_NEWMV] += 1700;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_NEW_NEWL2] += 1700;
+ rd->thresh_mult[THR_SR_NEW_NEWL3] += 1700;
+ rd->thresh_mult[THR_SR_NEW_NEWB] += 1700;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_SR_NEW_NEWA] += 1700;
+ rd->thresh_mult[THR_SR_NEW_NEWG] += 1700;
+#endif // CONFIG_COMPOUND_SINGLEREF
+
rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000;
#if CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000;
@@ -994,6 +1063,13 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000;
rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] += 1000;
+
+#if CONFIG_EXT_COMP_REFS
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] += 1000;
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#else // CONFIG_EXT_INTER
@@ -1009,6 +1085,12 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEARESTL2B] += 1000;
rd->thresh_mult[THR_COMP_NEARESTL3B] += 1000;
rd->thresh_mult[THR_COMP_NEARESTGB] += 1000;
+#if CONFIG_EXT_COMP_REFS
+ rd->thresh_mult[THR_COMP_NEARESTLL2] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTLL3] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTLG] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTBA] += 1000;
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
@@ -1081,6 +1163,40 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEW_NEARGB] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWGB] += 2000;
rd->thresh_mult[THR_COMP_ZERO_ZEROGB] += 2500;
+
+#if CONFIG_EXT_COMP_REFS
+ rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARLL2] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWLL2] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROLL2] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAR_NEARLL3] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWLL3] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARLL3] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWLL3] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROLL3] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTLG] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARLG] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWLG] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROLG] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAR_NEARBA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWBA] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTBA] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWBA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARBA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROBA] += 2500;
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#else // CONFIG_EXT_INTER
@@ -1105,6 +1221,17 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEWL3B] += 2000;
rd->thresh_mult[THR_COMP_NEARGB] += 1500;
rd->thresh_mult[THR_COMP_NEWGB] += 2000;
+
+#if CONFIG_EXT_COMP_REFS
+ rd->thresh_mult[THR_COMP_NEARLL2] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLL2] += 2000;
+ rd->thresh_mult[THR_COMP_NEARLL3] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLL3] += 2000;
+ rd->thresh_mult[THR_COMP_NEARLG] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLG] += 2000;
+ rd->thresh_mult[THR_COMP_NEARBA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWBA] += 2000;
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
@@ -1119,6 +1246,13 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_ZEROL2B] += 2500;
rd->thresh_mult[THR_COMP_ZEROL3B] += 2500;
rd->thresh_mult[THR_COMP_ZEROGB] += 2500;
+
+#if CONFIG_EXT_COMP_REFS
+ rd->thresh_mult[THR_COMP_ZEROLL2] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROLL3] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROLG] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROBA] += 2500;
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
diff --git a/third_party/aom/av1/encoder/rd.h b/third_party/aom/av1/encoder/rd.h
index 5c3eee493..ea5115b41 100644
--- a/third_party/aom/av1/encoder/rd.h
+++ b/third_party/aom/av1/encoder/rd.h
@@ -30,12 +30,13 @@ extern "C" {
#define RDDIV_BITS 7
#define RD_EPB_SHIFT 6
-#define RDCOST(RM, DM, R, D) \
- (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), AV1_PROB_COST_SHIFT) + (D << DM))
+#define RDCOST(RM, R, D) \
+ (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), AV1_PROB_COST_SHIFT) + \
+ (D << RDDIV_BITS))
-#define RDCOST_DBL(RM, DM, R, D) \
+#define RDCOST_DBL(RM, R, D) \
(((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \
- ((double)(D) * (1 << (DM))))
+ ((double)(D) * (1 << RDDIV_BITS)))
#define QIDX_SKIP_THRESH 115
@@ -96,6 +97,54 @@ typedef enum {
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ THR_SR_NEAREST_NEARMV,
+#if CONFIG_EXT_REFS
+ THR_SR_NEAREST_NEARL2,
+ THR_SR_NEAREST_NEARL3,
+ THR_SR_NEAREST_NEARB,
+#endif // CONFIG_EXT_REFS
+ THR_SR_NEAREST_NEARG,
+ THR_SR_NEAREST_NEARA,
+
+ /*
+ THR_SR_NEAREST_NEWMV,
+#if CONFIG_EXT_REFS
+ THR_SR_NEAREST_NEWL2,
+ THR_SR_NEAREST_NEWL3,
+ THR_SR_NEAREST_NEWB,
+#endif // CONFIG_EXT_REFS
+ THR_SR_NEAREST_NEWG,
+ THR_SR_NEAREST_NEWA,*/
+
+ THR_SR_NEAR_NEWMV,
+#if CONFIG_EXT_REFS
+ THR_SR_NEAR_NEWL2,
+ THR_SR_NEAR_NEWL3,
+ THR_SR_NEAR_NEWB,
+#endif // CONFIG_EXT_REFS
+ THR_SR_NEAR_NEWG,
+ THR_SR_NEAR_NEWA,
+
+ THR_SR_ZERO_NEWMV,
+#if CONFIG_EXT_REFS
+ THR_SR_ZERO_NEWL2,
+ THR_SR_ZERO_NEWL3,
+ THR_SR_ZERO_NEWB,
+#endif // CONFIG_EXT_REFS
+ THR_SR_ZERO_NEWG,
+ THR_SR_ZERO_NEWA,
+
+ THR_SR_NEW_NEWMV,
+#if CONFIG_EXT_REFS
+ THR_SR_NEW_NEWL2,
+ THR_SR_NEW_NEWL3,
+ THR_SR_NEW_NEWB,
+#endif // CONFIG_EXT_REFS
+ THR_SR_NEW_NEWG,
+ THR_SR_NEW_NEWA,
+#endif // CONFIG_COMPOUND_SINGLEREF
+
THR_COMP_NEAREST_NEARESTLA,
#if CONFIG_EXT_REFS
THR_COMP_NEAREST_NEARESTL2A,
@@ -107,6 +156,12 @@ typedef enum {
THR_COMP_NEAREST_NEARESTL2B,
THR_COMP_NEAREST_NEARESTL3B,
THR_COMP_NEAREST_NEARESTGB,
+#if CONFIG_EXT_COMP_REFS
+ THR_COMP_NEAREST_NEARESTLL2,
+ THR_COMP_NEAREST_NEARESTLL3,
+ THR_COMP_NEAREST_NEARESTLG,
+ THR_COMP_NEAREST_NEARESTBA,
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#else // CONFIG_EXT_INTER
@@ -122,6 +177,12 @@ typedef enum {
THR_COMP_NEARESTL2B,
THR_COMP_NEARESTL3B,
THR_COMP_NEARESTGB,
+#if CONFIG_EXT_COMP_REFS
+ THR_COMP_NEARESTLL2,
+ THR_COMP_NEARESTLL3,
+ THR_COMP_NEARESTLG,
+ THR_COMP_NEARESTBA,
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
@@ -138,8 +199,6 @@ typedef enum {
#if CONFIG_EXT_INTER
- THR_COMP_NEAR_NEARESTLA,
- THR_COMP_NEAREST_NEARLA,
THR_COMP_NEAR_NEARLA,
THR_COMP_NEW_NEARESTLA,
THR_COMP_NEAREST_NEWLA,
@@ -149,8 +208,6 @@ typedef enum {
THR_COMP_ZERO_ZEROLA,
#if CONFIG_EXT_REFS
- THR_COMP_NEAR_NEARESTL2A,
- THR_COMP_NEAREST_NEARL2A,
THR_COMP_NEAR_NEARL2A,
THR_COMP_NEW_NEARESTL2A,
THR_COMP_NEAREST_NEWL2A,
@@ -159,8 +216,6 @@ typedef enum {
THR_COMP_NEW_NEWL2A,
THR_COMP_ZERO_ZEROL2A,
- THR_COMP_NEAR_NEARESTL3A,
- THR_COMP_NEAREST_NEARL3A,
THR_COMP_NEAR_NEARL3A,
THR_COMP_NEW_NEARESTL3A,
THR_COMP_NEAREST_NEWL3A,
@@ -170,8 +225,6 @@ typedef enum {
THR_COMP_ZERO_ZEROL3A,
#endif // CONFIG_EXT_REFS
- THR_COMP_NEAR_NEARESTGA,
- THR_COMP_NEAREST_NEARGA,
THR_COMP_NEAR_NEARGA,
THR_COMP_NEW_NEARESTGA,
THR_COMP_NEAREST_NEWGA,
@@ -181,8 +234,6 @@ typedef enum {
THR_COMP_ZERO_ZEROGA,
#if CONFIG_EXT_REFS
- THR_COMP_NEAR_NEARESTLB,
- THR_COMP_NEAREST_NEARLB,
THR_COMP_NEAR_NEARLB,
THR_COMP_NEW_NEARESTLB,
THR_COMP_NEAREST_NEWLB,
@@ -191,8 +242,6 @@ typedef enum {
THR_COMP_NEW_NEWLB,
THR_COMP_ZERO_ZEROLB,
- THR_COMP_NEAR_NEARESTL2B,
- THR_COMP_NEAREST_NEARL2B,
THR_COMP_NEAR_NEARL2B,
THR_COMP_NEW_NEARESTL2B,
THR_COMP_NEAREST_NEWL2B,
@@ -201,8 +250,6 @@ typedef enum {
THR_COMP_NEW_NEWL2B,
THR_COMP_ZERO_ZEROL2B,
- THR_COMP_NEAR_NEARESTL3B,
- THR_COMP_NEAREST_NEARL3B,
THR_COMP_NEAR_NEARL3B,
THR_COMP_NEW_NEARESTL3B,
THR_COMP_NEAREST_NEWL3B,
@@ -211,8 +258,6 @@ typedef enum {
THR_COMP_NEW_NEWL3B,
THR_COMP_ZERO_ZEROL3B,
- THR_COMP_NEAR_NEARESTGB,
- THR_COMP_NEAREST_NEARGB,
THR_COMP_NEAR_NEARGB,
THR_COMP_NEW_NEARESTGB,
THR_COMP_NEAREST_NEWGB,
@@ -220,6 +265,40 @@ typedef enum {
THR_COMP_NEAR_NEWGB,
THR_COMP_NEW_NEWGB,
THR_COMP_ZERO_ZEROGB,
+
+#if CONFIG_EXT_COMP_REFS
+ THR_COMP_NEAR_NEARLL2,
+ THR_COMP_NEW_NEARESTLL2,
+ THR_COMP_NEAREST_NEWLL2,
+ THR_COMP_NEW_NEARLL2,
+ THR_COMP_NEAR_NEWLL2,
+ THR_COMP_NEW_NEWLL2,
+ THR_COMP_ZERO_ZEROLL2,
+
+ THR_COMP_NEAR_NEARLL3,
+ THR_COMP_NEW_NEARESTLL3,
+ THR_COMP_NEAREST_NEWLL3,
+ THR_COMP_NEW_NEARLL3,
+ THR_COMP_NEAR_NEWLL3,
+ THR_COMP_NEW_NEWLL3,
+ THR_COMP_ZERO_ZEROLL3,
+
+ THR_COMP_NEAR_NEARLG,
+ THR_COMP_NEW_NEARESTLG,
+ THR_COMP_NEAREST_NEWLG,
+ THR_COMP_NEW_NEARLG,
+ THR_COMP_NEAR_NEWLG,
+ THR_COMP_NEW_NEWLG,
+ THR_COMP_ZERO_ZEROLG,
+
+ THR_COMP_NEAR_NEARBA,
+ THR_COMP_NEW_NEARESTBA,
+ THR_COMP_NEAREST_NEWBA,
+ THR_COMP_NEW_NEARBA,
+ THR_COMP_NEAR_NEWBA,
+ THR_COMP_NEW_NEWBA,
+ THR_COMP_ZERO_ZEROBA,
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#else // CONFIG_EXT_INTER
@@ -244,6 +323,17 @@ typedef enum {
THR_COMP_NEWL3B,
THR_COMP_NEARGB,
THR_COMP_NEWGB,
+
+#if CONFIG_EXT_COMP_REFS
+ THR_COMP_NEARLL2,
+ THR_COMP_NEWLL2,
+ THR_COMP_NEARLL3,
+ THR_COMP_NEWLL3,
+ THR_COMP_NEARLG,
+ THR_COMP_NEWLG,
+ THR_COMP_NEARBA,
+ THR_COMP_NEWBA,
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
THR_COMP_ZEROLA,
@@ -258,6 +348,13 @@ typedef enum {
THR_COMP_ZEROL2B,
THR_COMP_ZEROL3B,
THR_COMP_ZEROGB,
+
+#if CONFIG_EXT_COMP_REFS
+ THR_COMP_ZEROLL2,
+ THR_COMP_ZEROLL3,
+ THR_COMP_ZEROLG,
+ THR_COMP_ZEROBA,
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
@@ -344,12 +441,11 @@ typedef struct RD_OPT {
int thresh_mult[MAX_MODES];
int thresh_mult_sub8x8[MAX_REFS];
- int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
+ int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES];
int64_t prediction_type_threshes[TOTAL_REFS_PER_FRAME][REFERENCE_MODES];
int RDMULT;
- int RDDIV;
} RD_OPT;
static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
@@ -361,7 +457,9 @@ static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
rd_stats->rdcost = 0;
rd_stats->sse = 0;
rd_stats->skip = 1;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ rd_stats->zero_rate = 0;
+ rd_stats->ref_rdcost = INT64_MAX;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats->dist_y = 0;
#endif
#if CONFIG_RD_DEBUG
@@ -388,7 +486,9 @@ static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
rd_stats->rdcost = INT64_MAX;
rd_stats->sse = INT64_MAX;
rd_stats->skip = 0;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ rd_stats->zero_rate = 0;
+ rd_stats->ref_rdcost = INT64_MAX;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats->dist_y = INT64_MAX;
#endif
#if CONFIG_RD_DEBUG
@@ -415,7 +515,7 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
rd_stats_dst->dist += rd_stats_src->dist;
rd_stats_dst->sse += rd_stats_src->sse;
rd_stats_dst->skip &= rd_stats_src->skip;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats_dst->dist_y += rd_stats_src->dist_y;
#endif
#if CONFIG_RD_DEBUG
diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c
index 2a537a06a..43b00b83b 100644
--- a/third_party/aom/av1/encoder/rdopt.c
+++ b/third_party/aom/av1/encoder/rdopt.c
@@ -63,7 +63,7 @@
#endif // CONFIG_PVQ
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
-#endif // CONFIG_PVQ || CONFIG_DAALA_DIST
+#endif // CONFIG_PVQ || CONFIG_DIST_8X8
#if CONFIG_DUAL_FILTER
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
#if USE_EXTRA_FILTER
@@ -113,8 +113,14 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
#endif // CONFIG_EXT_REFS
#if CONFIG_EXT_REFS
+#if CONFIG_EXT_COMP_REFS
+#define SECOND_REF_FRAME_MASK \
+ ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | (1 << GOLDEN_FRAME) | \
+ (1 << LAST2_FRAME) | 0x01) // NOLINT
+#else // !CONFIG_EXT_COMP_REFS
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
-#else
+#endif // CONFIG_EXT_COMP_REFS
+#else // !CONFIG_EXT_REFS
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
#endif // CONFIG_EXT_REFS
@@ -126,6 +132,11 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
#define FILTER_FAST_SEARCH 1
#endif // CONFIG_EXT_INTRA
+// Setting this to 1 will disable trellis optimization within the
+// transform search. Trellis optimization will still be applied
+// in the final encode.
+#define DISABLE_TRELLISQ_SEARCH 0
+
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert
-7.7051, -3.2234, -3.6193, 3.4533 }; // horz
@@ -191,6 +202,56 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
// TODO(zoeliu): May need to reconsider the order on the modes to check
#if CONFIG_EXT_INTER
+
+#if CONFIG_COMPOUND_SINGLEREF
+ // Single ref comp mode
+ { SR_NEAREST_NEARMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_NEAREST_NEARMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEARMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEARMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_NEAREST_NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEARMV, { ALTREF_FRAME, NONE_FRAME } },
+
+ /*
+ { SR_NEAREST_NEWMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_NEAREST_NEWMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEWMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_NEAREST_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEWMV, { ALTREF_FRAME, NONE_FRAME } },*/
+
+ { SR_NEAR_NEWMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_NEAR_NEWMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_NEAR_NEWMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_NEAR_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_NEAR_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_NEAR_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
+
+ { SR_ZERO_NEWMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_ZERO_NEWMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_ZERO_NEWMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_ZERO_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_ZERO_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_ZERO_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
+
+ { SR_NEW_NEWMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_NEW_NEWMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_NEW_NEWMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_NEW_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_NEW_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_NEW_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_COMPOUND_SINGLEREF
+
{ NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
@@ -202,6 +263,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#else // CONFIG_EXT_INTER
@@ -217,6 +285,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
@@ -297,9 +372,43 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
+ { ZERO_ZEROMV, { LAST_FRAME, LAST2_FRAME } },
+
+ { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+ { ZERO_ZEROMV, { LAST_FRAME, LAST3_FRAME } },
+
+ { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { ZERO_ZEROMV, { LAST_FRAME, GOLDEN_FRAME } },
+
+ { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { ZERO_ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
-#else // CONFIG_EXT_INTER
+#else // !CONFIG_EXT_INTER
{ NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEWMV, { LAST_FRAME, ALTREF_FRAME } },
@@ -321,6 +430,17 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { NEARMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEWMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEARMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEWMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
{ ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
@@ -335,6 +455,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
{ ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
{ ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { ZEROMV, { LAST_FRAME, LAST2_FRAME } },
+ { ZEROMV, { LAST_FRAME, LAST3_FRAME } },
+ { ZEROMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
@@ -385,6 +512,35 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
#endif // CONFIG_EXT_INTER
};
+static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
+ DC_PRED, H_PRED, V_PRED,
+#if CONFIG_ALT_INTRA
+ SMOOTH_PRED,
+#endif // CONFIG_ALT_INTRA
+ TM_PRED,
+#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+ SMOOTH_V_PRED, SMOOTH_H_PRED,
+#endif // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+ D135_PRED, D207_PRED, D153_PRED, D63_PRED, D117_PRED, D45_PRED,
+};
+
+#if CONFIG_CFL
+static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
+ UV_DC_PRED, UV_H_PRED, UV_V_PRED,
+#if CONFIG_ALT_INTRA
+ UV_SMOOTH_PRED,
+#endif // CONFIG_ALT_INTRA
+ UV_TM_PRED,
+#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+ UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
+#endif // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+ UV_D135_PRED, UV_D207_PRED, UV_D153_PRED,
+ UV_D63_PRED, UV_D117_PRED, UV_D45_PRED,
+};
+#else
+#define uv_rd_search_mode_order intra_rd_search_mode_order
+#endif // CONFIG_CFL
+
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
static INLINE int write_uniform_cost(int n, int v) {
const int l = get_unsigned_bits(n);
@@ -404,7 +560,7 @@ static INLINE int write_uniform_cost(int n, int v) {
#define FAST_EXT_TX_EDST_MARGIN 0.3
#if CONFIG_DAALA_DIST
-static int od_compute_var_4x4(od_coeff *x, int stride) {
+static int od_compute_var_4x4(uint16_t *x, int stride) {
int sum;
int s2;
int i;
@@ -420,7 +576,7 @@ static int od_compute_var_4x4(od_coeff *x, int stride) {
s2 += t * t;
}
}
- // TODO(yushin) : Check wheter any changes are required for high bit depth.
+
return (s2 - (sum * sum >> 4)) >> 4;
}
@@ -431,8 +587,8 @@ static int od_compute_var_4x4(od_coeff *x, int stride) {
#define OD_DIST_LP_MID (5)
#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
-static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
- od_coeff *y, od_coeff *e_lp, int stride) {
+static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
+ uint16_t *y, od_coeff *e_lp, int stride) {
double sum;
int min_var;
double mean_var;
@@ -444,8 +600,7 @@ static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
double vardist;
vardist = 0;
- OD_ASSERT(qm != OD_FLAT_QM);
- (void)qm;
+
#if 1
min_var = INT_MAX;
mean_var = 0;
@@ -490,22 +645,61 @@ static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
}
// Note : Inputs x and y are in a pixel domain
-static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
- od_coeff *y, int bsize_w, int bsize_h,
- int qindex) {
+static double od_compute_dist_common(int activity_masking, uint16_t *x,
+ uint16_t *y, int bsize_w, int bsize_h,
+ int qindex, od_coeff *tmp,
+ od_coeff *e_lp) {
+ int i, j;
+ double sum = 0;
+ const int mid = OD_DIST_LP_MID;
+
+ for (j = 0; j < bsize_w; j++) {
+ e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
+ e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
+ 2 * tmp[(bsize_h - 2) * bsize_w + j];
+ }
+ for (i = 1; i < bsize_h - 1; i++) {
+ for (j = 0; j < bsize_w; j++) {
+ e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
+ tmp[(i - 1) * bsize_w + j] +
+ tmp[(i + 1) * bsize_w + j];
+ }
+ }
+ for (i = 0; i < bsize_h; i += 8) {
+ for (j = 0; j < bsize_w; j += 8) {
+ sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
+ &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
+ bsize_w);
+ }
+ }
+ /* Scale according to linear regression against SSE, for 8x8 blocks. */
+ if (activity_masking) {
+ sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
+ (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
+ } else {
+ sum *= qindex >= 128
+ ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
+ : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
+ : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
+ }
+
+ return sum;
+}
+
+static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
+ int bsize_h, int qindex) {
int i;
double sum;
sum = 0;
assert(bsize_w >= 8 && bsize_h >= 8);
- if (qm == OD_FLAT_QM) {
- for (i = 0; i < bsize_w * bsize_h; i++) {
- double tmp;
- tmp = x[i] - y[i];
- sum += tmp * tmp;
- }
- } else {
+#if CONFIG_PVQ
+ int activity_masking = 1;
+#else
+ int activity_masking = 0;
+#endif
+ {
int j;
DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
@@ -525,63 +719,242 @@ static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
}
}
- for (j = 0; j < bsize_w; j++) {
- e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
- e_lp[(bsize_h - 1) * bsize_w + j] =
- mid * tmp[(bsize_h - 1) * bsize_w + j] +
- 2 * tmp[(bsize_h - 2) * bsize_w + j];
- }
- for (i = 1; i < bsize_h - 1; i++) {
+ sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
+ qindex, tmp, e_lp);
+ }
+ return sum;
+}
+
+static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
+ int bsize_h, int qindex) {
+ int i;
+ double sum;
+ sum = 0;
+
+ assert(bsize_w >= 8 && bsize_h >= 8);
+
+#if CONFIG_PVQ
+ int activity_masking = 1;
+#else
+ int activity_masking = 0;
+#endif
+ {
+ int j;
+ DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
+ int mid = OD_DIST_LP_MID;
+ for (i = 0; i < bsize_h; i++) {
for (j = 0; j < bsize_w; j++) {
- e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
- tmp[(i - 1) * bsize_w + j] +
- tmp[(i + 1) * bsize_w + j];
+ y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
}
}
- for (i = 0; i < bsize_h; i += 8) {
- for (j = 0; j < bsize_w; j += 8) {
- sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
- &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
- bsize_w);
+ for (i = 0; i < bsize_h; i++) {
+ tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
+ tmp[i * bsize_w + bsize_w - 1] =
+ mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
+ for (j = 1; j < bsize_w - 1; j++) {
+ tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
+ e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
}
}
- /* Scale according to linear regression against SSE, for 8x8 blocks. */
- if (activity_masking) {
- sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
- (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
- } else {
- sum *= qindex >= 128
- ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
- : qindex <= 43
- ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
- : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
- }
+ sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
+ qindex, tmp, e_lp);
}
return sum;
}
+#endif // CONFIG_DAALA_DIST
+
+#if CONFIG_DIST_8X8
+#define NEW_FUTURE_DIST 0
+int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
+ const uint8_t *src, int src_stride, const uint8_t *dst,
+ int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
+ int bsh, int visible_w, int visible_h, int qindex) {
+ int64_t d = 0;
-int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
- int dst_stride, int bsw, int bsh, int qm,
- int use_activity_masking, int qindex) {
+#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
int i, j;
- int64_t d;
- DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
- DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);
- assert(qm == OD_HVS_QM);
+ DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]);
+ (void)cpi;
+ (void)tx_bsize;
+#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+
+#if !CONFIG_HIGHBITDEPTH
+ (void)xd;
+#endif
+
+#if !CONFIG_DAALA_DIST
+ (void)qindex;
+#endif
+
+#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST
+ (void)xd;
+ (void)bsw, (void)bsh;
+ (void)visible_w, (void)visible_h;
+#endif
+
+#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+
+ if ((bsw == visible_w) && (bsh == visible_h)) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ }
+
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ }
+ }
+ } else {
+#endif
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+
+ if ((bsw == visible_w) && (bsh == visible_h)) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ rec[j * bsw + i] = dst[j * dst_stride + i];
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++)
+ rec[j * bsw + i] = src[j * src_stride + i];
+ }
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++) rec[j * bsw + i] = src[j * src_stride + i];
+ }
+ }
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+
+#if CONFIG_DAALA_DIST
+ d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
+#elif NEW_FUTURE_DIST
+ // Call new 8x8-wise distortion function here, for example
+ for (i = 0; i < bsh; i += 8) {
+ for (j = 0; j < bsw; j += 8) {
+ d +=
+ av1_compute_dist_8x8(&orig[i * bsw + j], &rec[i * bsw + j], bsw, bsh);
+ }
+ }
+#else
+ // Otherwise, MSE by default
+ unsigned sse;
+ // TODO(Any): Use even faster function which does not calculate variance
+ cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
+ d = sse;
+#endif // CONFIG_DAALA_DIST
- d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
- qindex);
return d;
}
+
+static int64_t av1_dist_8x8_diff(const MACROBLOCKD *xd, const uint8_t *src,
+ int src_stride, const int16_t *diff,
+ int diff_stride, int bsw, int bsh,
+ int visible_w, int visible_h, int qindex) {
+ int64_t d = 0;
+
+#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+ int i, j;
+
+ DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]);
+#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+
+#if !CONFIG_HIGHBITDEPTH
+ (void)xd;
+#endif
+
+#if !CONFIG_DAALA_DIST
+ (void)qindex;
+#endif
+
+#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST
+ (void)xd;
+ (void)src, (void)src_stride;
+ (void)bsw, (void)bsh;
+ (void)visible_w, (void)visible_h;
+#endif
+
+#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ } else {
+#endif
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+
+ if ((bsw == visible_w) && (bsh == visible_h)) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) diff16[j * bsw + i] = diff[j * diff_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ diff16[j * bsw + i] = diff[j * diff_stride + i];
+
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
+ }
+
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
+ }
+ }
+#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+
+#if CONFIG_DAALA_DIST
+ d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
+#elif NEW_FUTURE_DIST
+ // Call new 8x8-wise distortion function (with diff inpu) here, for example
+ for (i = 0; i < bsh; i += 8) {
+ for (j = 0; j < bsw; j += 8) {
+ d += av1_compute_dist_8x8_diff(&orig[i * bsw + j], &diff16[i * bsw + j],
+ bsw, bsh);
+ }
+ }
+#else
+ // Otherwise, MSE by default
+ d = aom_sum_squares_2d_i16(diff, diff_stride, bsw, bsh);
#endif // CONFIG_DAALA_DIST
+ return d;
+}
+#endif // CONFIG_DIST_8X8
+
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
@@ -892,11 +1265,11 @@ static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
for (plane = plane_from; plane <= plane_to; ++plane) {
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
-#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
+#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#else
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
-#endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
+#endif // CONFIG_CHROMA_SUB8X8
unsigned int sse;
int rate;
@@ -1068,7 +1441,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
// Check for consistency of tx_size with mode info
- assert(tx_size == get_tx_size(plane, xd));
+ assert(tx_size == av1_get_tx_size(plane, xd));
#endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
(void)cm;
@@ -1144,10 +1517,12 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
#endif // !CONFIG_LV_MAP
int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
- int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
- const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
- int use_fast_coef_costing) {
+ int blk_row, int blk_col, int block, TX_SIZE tx_size,
+ const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a,
+ const ENTROPY_CONTEXT *l, int use_fast_coef_costing) {
#if !CONFIG_LV_MAP
+ (void)blk_row;
+ (void)blk_col;
const AV1_COMMON *const cm = &cpi->common;
return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
use_fast_coef_costing);
@@ -1158,13 +1533,11 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const struct macroblockd_plane *pd = &xd->plane[plane];
const BLOCK_SIZE bsize = mbmi->sb_type;
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-#else
+#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
-#endif // CONFIG_CHROMA_2X2
+#elif CONFIG_CB4X4
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
#else // CONFIG_CB4X4
const BLOCK_SIZE plane_bsize =
get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
@@ -1172,7 +1545,8 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
TXB_CTX txb_ctx;
get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
- return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx);
+ return av1_cost_coeffs_txb(cpi, x, plane, blk_row, blk_col, block, tx_size,
+ &txb_ctx);
#endif // !CONFIG_LV_MAP
}
#endif // !CONFIG_PVQ || CONFIG_VAR_TX
@@ -1182,9 +1556,9 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
BLOCK_SIZE tx_bsize, int *width, int *height,
int *visible_width, int *visible_height) {
-#if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
+#if !(CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX))
assert(tx_bsize <= plane_bsize);
-#endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
+#endif
int txb_height = block_size_high[tx_bsize];
int txb_width = block_size_wide[tx_bsize];
const int block_height = block_size_high[plane_bsize];
@@ -1208,19 +1582,31 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
}
-// Compute the pixel domain sum square error on all visible 4x4s in the
+// Compute the pixel domain distortion from src and dst on all visible 4x4s in
+// the
// transform block.
-static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
- int plane, const uint8_t *src, const int src_stride,
- const uint8_t *dst, const int dst_stride, int blk_row,
- int blk_col, const BLOCK_SIZE plane_bsize,
- const BLOCK_SIZE tx_bsize) {
+static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
+ int plane, const uint8_t *src, const int src_stride,
+ const uint8_t *dst, const int dst_stride,
+ int blk_row, int blk_col,
+ const BLOCK_SIZE plane_bsize,
+ const BLOCK_SIZE tx_bsize) {
int txb_rows, txb_cols, visible_rows, visible_cols;
+ const MACROBLOCKD *xd = &x->e_mbd;
+
get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
&txb_cols, &txb_rows, &visible_cols, &visible_rows);
assert(visible_rows > 0);
assert(visible_cols > 0);
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+
+#if CONFIG_DIST_8X8
+ if (plane == 0 && txb_cols >= 8 && txb_rows >= 8)
+ return av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, tx_bsize,
+ txb_cols, txb_rows, visible_cols, visible_rows,
+ x->qindex);
+#endif // CONFIG_DIST_8X8
+
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
if ((txb_rows == visible_rows && txb_cols == visible_cols) &&
tx_bsize < BLOCK_SIZES) {
#else
@@ -1242,36 +1628,86 @@ static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
return sse;
}
-// Compute the squares sum squares on all visible 4x4s in the transform block.
-static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane,
- const int16_t *diff, const int diff_stride,
- int blk_row, int blk_col,
- const BLOCK_SIZE plane_bsize,
- const BLOCK_SIZE tx_bsize) {
+// Compute the pixel domain distortion from diff on all visible 4x4s in the
+// transform block.
+static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
+ const int16_t *diff, const int diff_stride,
+ int blk_row, int blk_col,
+ const BLOCK_SIZE plane_bsize,
+ const BLOCK_SIZE tx_bsize) {
int visible_rows, visible_cols;
+ const MACROBLOCKD *xd = &x->e_mbd;
+#if CONFIG_DIST_8X8
+ int txb_height = block_size_high[tx_bsize];
+ int txb_width = block_size_wide[tx_bsize];
+ const int src_stride = x->plane[plane].src.stride;
+ const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0];
+ const uint8_t *src = &x->plane[plane].src.buf[src_idx];
+#endif
+
get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
NULL, &visible_cols, &visible_rows);
- return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
+
+#if CONFIG_DIST_8X8
+ if (plane == 0 && txb_width >= 8 && txb_height >= 8)
+ return av1_dist_8x8_diff(xd, src, src_stride, diff, diff_stride, txb_width,
+ txb_height, visible_cols, visible_rows, x->qindex);
+ else
+#endif
+ return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols,
+ visible_rows);
+}
+
+#if CONFIG_PALETTE || CONFIG_INTRABC
+int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) {
+ int val_count[256];
+ memset(val_count, 0, sizeof(val_count));
+ for (int r = 0; r < rows; ++r) {
+ for (int c = 0; c < cols; ++c) {
+ ++val_count[src[r * stride + c]];
+ }
+ }
+ int n = 0;
+ for (int i = 0; i < 256; ++i) {
+ if (val_count[i]) ++n;
+ }
+ return n;
}
+#if CONFIG_HIGHBITDEPTH
+int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
+ int bit_depth) {
+ assert(bit_depth <= 12);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ int val_count[1 << 12];
+ memset(val_count, 0, (1 << 12) * sizeof(val_count[0]));
+ for (int r = 0; r < rows; ++r) {
+ for (int c = 0; c < cols; ++c) {
+ ++val_count[src[r * stride + c]];
+ }
+ }
+ int n = 0;
+ for (int i = 0; i < (1 << bit_depth); ++i) {
+ if (val_count[i]) ++n;
+ }
+ return n;
+}
+#endif // CONFIG_HIGHBITDEPTH
+#endif // CONFIG_PALETTE || CONFIG_INTRABC
+
void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
OUTPUT_STATUS output_status) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
-#if CONFIG_DAALA_DIST
- int qm = OD_HVS_QM;
- int use_activity_masking = 0;
-#if CONFIG_PVQ
- use_activity_masking = x->daala_enc.use_activity_masking;
-#endif // CONFIG_PVQ
+#if CONFIG_DIST_8X8
struct macroblockd_plane *const pd = &xd->plane[plane];
-#else // CONFIG_DAALA_DIST
+#else // CONFIG_DIST_8X8
const struct macroblockd_plane *const pd = &xd->plane[plane];
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
- if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {
+ if (cpi->sf.use_transform_domain_distortion && !CONFIG_DIST_8X8) {
// Transform domain distortion computation is more efficient as it does
// not involve an inverse transform, but it is less accurate.
const int buffer_length = tx_size_2d[tx_size];
@@ -1292,19 +1728,21 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
&this_sse) >>
shift;
#endif // CONFIG_HIGHBITDEPTH
-#elif CONFIG_HIGHBITDEPTH
- const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
- *out_dist =
- av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
- shift;
-#else
- *out_dist =
- av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
+#else // !CONFIG_PVQ
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length,
+ &this_sse, xd->bd) >>
+ shift;
+ else
+#endif
+ *out_dist =
+ av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
#endif // CONFIG_PVQ
*out_sse = this_sse >> shift;
} else {
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
-#if !CONFIG_PVQ || CONFIG_DAALA_DIST
+#if !CONFIG_PVQ || CONFIG_DIST_8X8
const int bsw = block_size_wide[tx_bsize];
const int bsh = block_size_high[tx_bsize];
#endif
@@ -1323,34 +1761,13 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
assert(cpi != NULL);
assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
-#if CONFIG_DAALA_DIST
- if (plane == 0 && bsw >= 8 && bsh >= 8) {
- if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
- const int pred_stride = block_size_wide[plane_bsize];
- const int pred_idx = (blk_row * pred_stride + blk_col)
- << tx_size_wide_log2[0];
- const int16_t *pred = &pd->pred[pred_idx];
- int i, j;
- DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
-
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- pred8[j * bsw + i] = pred[j * pred_stride + i];
- *out_sse = av1_daala_dist(src, src_stride, pred8, bsw, bsw, bsh, qm,
- use_activity_masking, x->qindex);
- } else {
- *out_sse = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
- qm, use_activity_masking, x->qindex);
- }
- } else
-#endif // CONFIG_DAALA_DIST
{
const int diff_stride = block_size_wide[plane_bsize];
const int diff_idx = (blk_row * diff_stride + blk_col)
<< tx_size_wide_log2[0];
const int16_t *diff = &p->src_diff[diff_idx];
- *out_sse = sum_squares_visible(xd, plane, diff, diff_stride, blk_row,
- blk_col, plane_bsize, tx_bsize);
+ *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
+ plane_bsize, tx_bsize);
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
*out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
@@ -1360,15 +1777,8 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
if (eob) {
if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
-#if CONFIG_DAALA_DIST
- if (plane == 0 && bsw >= 8 && bsh >= 8)
- *out_dist = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
- qm, use_activity_masking, x->qindex);
- else
-#endif // CONFIG_DAALA_DIST
- *out_dist =
- pixel_sse(cpi, xd, plane, src, src_stride, dst, dst_stride,
- blk_row, blk_col, plane_bsize, tx_bsize);
+ *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride,
+ blk_row, blk_col, plane_bsize, tx_bsize);
} else {
#if CONFIG_HIGHBITDEPTH
uint8_t *recon;
@@ -1399,37 +1809,44 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
#endif // !CONFIG_PVQ
const PLANE_TYPE plane_type = get_plane_type(plane);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-
- av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, recon,
- MAX_TX_SIZE, eob);
-
-#if CONFIG_DAALA_DIST
- if (plane == 0 && bsw >= 8 && bsh >= 8) {
- *out_dist = av1_daala_dist(src, src_stride, recon, MAX_TX_SIZE, bsw,
- bsh, qm, use_activity_masking, x->qindex);
- } else {
- if (plane == 0) {
- // Save decoded pixels for inter block in pd->pred to avoid
- // block_8x8_rd_txfm_daala_dist() need to produce them
- // by calling av1_inverse_transform_block() again.
- const int pred_stride = block_size_wide[plane_bsize];
- const int pred_idx = (blk_row * pred_stride + blk_col)
- << tx_size_wide_log2[0];
- int16_t *pred = &pd->pred[pred_idx];
- int i, j;
+ TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
+ av1_inverse_transform_block(xd, dqcoeff,
+#if CONFIG_LGT
+ xd->mi[0]->mbmi.mode,
+#endif
+ tx_type, tx_size, recon, MAX_TX_SIZE, eob);
+
+#if CONFIG_DIST_8X8
+ if (plane == 0 && (bsw < 8 || bsh < 8)) {
+ // Save decoded pixels for inter block in pd->pred to avoid
+ // block_8x8_rd_txfm_daala_dist() need to produce them
+ // by calling av1_inverse_transform_block() again.
+ const int pred_stride = block_size_wide[plane_bsize];
+ const int pred_idx = (blk_row * pred_stride + blk_col)
+ << tx_size_wide_log2[0];
+ int16_t *pred = &pd->pred[pred_idx];
+ int i, j;
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ pred[j * pred_stride + i] =
+ CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i];
+ } else {
+#endif
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++)
pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
+#if CONFIG_HIGHBITDEPTH
}
-#endif // CONFIG_DAALA_DIST
- *out_dist =
- pixel_sse(cpi, xd, plane, src, src_stride, recon, MAX_TX_SIZE,
- blk_row, blk_col, plane_bsize, tx_bsize);
-#if CONFIG_DAALA_DIST
+#endif // CONFIG_HIGHBITDEPTH
}
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
+ *out_dist =
+ pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
+ blk_row, blk_col, plane_bsize, tx_bsize);
}
*out_dist *= 16;
} else {
@@ -1453,33 +1870,25 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
int64_t rd1, rd2, rd;
RD_STATS this_rd_stats;
- assert(tx_size == get_tx_size(plane, xd));
+#if !CONFIG_SUPERTX && !CONFIG_VAR_TX
+ assert(tx_size == av1_get_tx_size(plane, xd));
+#endif // !CONFIG_SUPERTX
av1_init_rd_stats(&this_rd_stats);
if (args->exit_early) return;
if (!is_inter_block(mbmi)) {
-#if CONFIG_CFL
-
-#if CONFIG_EC_ADAPT
- FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *const ec_ctx = cm->fc;
-#endif // CONFIG_EC_ADAPT
-
- av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
- blk_row, tx_size, plane_bsize);
-#else
av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
-#endif
#if CONFIG_DPCM_INTRA
const int block_raster_idx =
av1_block_index_to_raster_order(tx_size, block);
- const PREDICTION_MODE mode =
- (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
- TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
- xd, block, tx_size);
+ const PREDICTION_MODE mode = (plane == AOM_PLANE_Y)
+ ? get_y_mode(xd->mi[0], block_raster_idx)
+ : get_uv_mode(mbmi->uv_mode);
+ TX_TYPE tx_type =
+ av1_get_tx_type((plane == AOM_PLANE_Y) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
+ xd, blk_row, blk_col, block, tx_size);
if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
int8_t skip;
av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
@@ -1496,9 +1905,36 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
#if !CONFIG_TXK_SEL
// full forward transform and quantization
const int coeff_ctx = combine_entropy_contexts(*a, *l);
+#if DISABLE_TRELLISQ_SEARCH
+ av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ coeff_ctx, AV1_XFORM_QUANT_B);
+#else
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
+
+ const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+ tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
+ const int buffer_length = tx_size_2d[tx_size];
+ int64_t tmp_dist;
+ int64_t tmp;
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ tmp_dist =
+ av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >>
+ shift;
+ else
+#endif
+ tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift;
+
+ if (RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
+ av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
+ a, l);
+ } else {
+ args->exit_early = 1;
+ return;
+ }
+#endif // DISABLE_TRELLISQ_SEARCH
if (!is_inter_block(mbmi)) {
struct macroblock_plane *const p = &x->plane[plane];
@@ -1518,25 +1954,27 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
const int dst_stride = pd->dst.stride;
uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
+ // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
+ // intra predicted.
+ cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize);
}
#endif
#if CONFIG_DPCM_INTRA
CALCULATE_RD : {}
#endif // CONFIG_DPCM_INTRA
- rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
+ rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
if (args->this_rd + rd > args->best_rd) {
args->exit_early = 1;
return;
}
#if !CONFIG_PVQ
const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const SCAN_ORDER *scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+ const TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
this_rd_stats.rate =
- av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l,
- args->use_fast_coef_costing);
+ av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
+ scan_order, a, l, args->use_fast_coef_costing);
#else // !CONFIG_PVQ
this_rd_stats.rate = x->rate;
#endif // !CONFIG_PVQ
@@ -1554,22 +1992,12 @@ CALCULATE_RD : {}
av1_set_txb_context(x, plane, block, tx_size, a, l);
#endif // !CONFIG_PVQ
- rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
- rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
+ rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
+ rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
// TODO(jingning): temporarily enabled only for luma component
rd = AOMMIN(rd1, rd2);
-#if CONFIG_DAALA_DIST
- if (plane == 0 && plane_bsize >= BLOCK_8X8 &&
- (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) {
- this_rd_stats.dist = 0;
- this_rd_stats.sse = 0;
- rd = 0;
- x->rate_4x4[block] = this_rd_stats.rate;
- }
-#endif // CONFIG_DAALA_DIST
-
#if !CONFIG_PVQ
this_rd_stats.skip &= !x->plane[plane].eobs[block];
#else
@@ -1579,113 +2007,93 @@ CALCULATE_RD : {}
args->this_rd += rd;
- if (args->this_rd > args->best_rd) {
- args->exit_early = 1;
- return;
+#if CONFIG_DIST_8X8
+ if (!(plane == 0 && plane_bsize >= BLOCK_8X8 &&
+ (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))) {
+#endif
+ if (args->this_rd > args->best_rd) {
+ args->exit_early = 1;
+ return;
+ }
+#if CONFIG_DIST_8X8
}
+#endif
}
-#if CONFIG_DAALA_DIST
-static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
- int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg) {
- struct rdcost_block_args *args = arg;
- MACROBLOCK *const x = args->x;
+#if CONFIG_DIST_8X8
+static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize,
+ struct rdcost_block_args *args) {
MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const struct macroblock_plane *const p = &x->plane[0];
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- int64_t rd, rd1, rd2;
- RD_STATS this_rd_stats;
- int qm = OD_HVS_QM;
- int use_activity_masking = 0;
-
- (void)tx_size;
-
- assert(plane == 0);
- assert(plane_bsize >= BLOCK_8X8);
-#if CONFIG_PVQ
- use_activity_masking = x->daala_enc.use_activity_masking;
-#endif // CONFIG_PVQ
- av1_init_rd_stats(&this_rd_stats);
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const uint8_t *src = &p->src.buf[0];
+ const uint8_t *dst = &pd->dst.buf[0];
+ const int16_t *pred = &pd->pred[0];
+ const int bw = block_size_wide[bsize];
+ const int bh = block_size_high[bsize];
- if (args->exit_early) return;
+ int i, j;
+ int64_t rd, rd1, rd2;
+ unsigned int tmp1, tmp2;
+ int qindex = x->qindex;
- {
- const struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
+ assert((bw & 0x07) == 0);
+ assert((bh & 0x07) == 0);
- const int src_stride = p->src.stride;
- const int dst_stride = pd->dst.stride;
- const int diff_stride = block_size_wide[plane_bsize];
+#if CONFIG_HIGHBITDEPTH
+ uint8_t *pred8;
+ DECLARE_ALIGNED(16, uint16_t, pred16[MAX_TX_SQUARE]);
- const uint8_t *src =
- &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
- const uint8_t *dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ pred8 = CONVERT_TO_BYTEPTR(pred16);
+ else
+ pred8 = (uint8_t *)pred16;
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
+#endif // CONFIG_HIGHBITDEPTH
- unsigned int tmp1, tmp2;
- int qindex = x->qindex;
- const int pred_stride = block_size_wide[plane_bsize];
- const int pred_idx = (blk_row * pred_stride + blk_col)
- << tx_size_wide_log2[0];
- int16_t *pred = &pd->pred[pred_idx];
- int i, j;
- const int tx_blk_size = 8;
-
- DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
-
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++)
- pred8[j * tx_blk_size + i] = pred[j * diff_stride + i];
-
- tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm,
- use_activity_masking, qindex);
- tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm,
- use_activity_masking, qindex);
-
- if (!is_inter_block(mbmi)) {
- this_rd_stats.sse = (int64_t)tmp1 * 16;
- this_rd_stats.dist = (int64_t)tmp2 * 16;
- } else {
- // For inter mode, the decoded pixels are provided in pd->pred,
- // while the predicted pixels are in dst.
- this_rd_stats.sse = (int64_t)tmp2 * 16;
- this_rd_stats.dist = (int64_t)tmp1 * 16;
- }
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bh; j++)
+ for (i = 0; i < bw; i++)
+ CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i];
+ } else {
+#endif
+ for (j = 0; j < bh; j++)
+ for (i = 0; i < bw; i++) pred8[j * bw + i] = pred[j * bw + i];
+#if CONFIG_HIGHBITDEPTH
}
+#endif // CONFIG_HIGHBITDEPTH
- rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
- if (args->this_rd + rd > args->best_rd) {
- args->exit_early = 1;
- return;
+ tmp1 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, bw, bsize, bw, bh, bw,
+ bh, qindex);
+ tmp2 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, bsize, bw, bh,
+ bw, bh, qindex);
+
+ if (!is_inter_block(mbmi)) {
+ args->rd_stats.sse = (int64_t)tmp1 * 16;
+ args->rd_stats.dist = (int64_t)tmp2 * 16;
+ } else {
+ // For inter mode, the decoded pixels are provided in pd->pred,
+ // while the predicted pixels are in dst.
+ args->rd_stats.sse = (int64_t)tmp2 * 16;
+ args->rd_stats.dist = (int64_t)tmp1 * 16;
}
- {
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
- const uint8_t txw_unit = tx_size_wide_unit[tx_size];
- const uint8_t txh_unit = tx_size_high_unit[tx_size];
- const int step = txw_unit * txh_unit;
- int offset_h = tx_size_high_unit[TX_4X4];
- // The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
- this_rd_stats.rate =
- x->rate_4x4[block - max_blocks_wide * offset_h - step] +
- x->rate_4x4[block - max_blocks_wide * offset_h] +
- x->rate_4x4[block - step] + x->rate_4x4[block];
- }
- rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
- rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
+ rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist);
+ rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse);
rd = AOMMIN(rd1, rd2);
- args->rd_stats.dist += this_rd_stats.dist;
- args->rd_stats.sse += this_rd_stats.sse;
-
- args->this_rd += rd;
+ args->rd_stats.rdcost = rd;
+ args->this_rd = rd;
- if (args->this_rd > args->best_rd) {
- args->exit_early = 1;
- return;
- }
+ if (args->this_rd > args->best_rd) args->exit_early = 1;
}
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
@@ -1705,15 +2113,13 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
-#if CONFIG_DAALA_DIST
- if (plane == 0 && bsize >= BLOCK_8X8 &&
+ av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
+ &args);
+#if CONFIG_DIST_8X8
+ if (!args.exit_early && plane == 0 && bsize >= BLOCK_8X8 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
- av1_foreach_8x8_transformed_block_in_yplane(
- xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
- else
-#endif // CONFIG_DAALA_DIST
- av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
- &args);
+ dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
+#endif
if (args.exit_early) {
av1_invalid_rd_stats(rd_stats);
@@ -1768,8 +2174,14 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
const MACROBLOCKD *const xd = &x->e_mbd;
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const int tx_select =
- cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT &&
+#if CONFIG_EXT_PARTITION_TYPES
+ // Currently these block shapes can only use 4x4
+ // transforms
+ mbmi->sb_type != BLOCK_4X16 &&
+ mbmi->sb_type != BLOCK_16X4 &&
+#endif
+ mbmi->sb_type >= BLOCK_8X8;
if (tx_select) {
const int is_inter = is_inter_block(mbmi);
@@ -1779,11 +2191,11 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
const int depth = tx_size_to_depth(coded_tx_size);
const int tx_size_ctx = get_tx_size_context(xd);
int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
tx_size == quarter_txsize_lookup[bsize]);
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
return r_tx_size;
} else {
return 0;
@@ -1796,6 +2208,10 @@ int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
TX_TYPE tx_type) {
if (plane > 0) return 0;
+#if CONFIG_VAR_TX
+ tx_size = get_min_tx_size(tx_size);
+#endif
+
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const int is_inter = is_inter_block(mbmi);
#if CONFIG_EXT_TX
@@ -1844,6 +2260,9 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size);
+#if CONFIG_PVQ
+ assert(tx_size >= TX_4X4);
+#endif // CONFIG_PVQ
assert(skip_prob > 0);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
@@ -1864,21 +2283,20 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
if (rd_stats->skip) {
if (is_inter) {
- rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
+ rd = RDCOST(x->rdmult, s1, rd_stats->sse);
} else {
- rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select,
- rd_stats->sse);
+ rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse);
}
} else {
- rd = RDCOST(x->rdmult, x->rddiv,
- rd_stats->rate + s0 + r_tx_size * tx_select, rd_stats->dist);
+ rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select,
+ rd_stats->dist);
}
if (tx_select) rd_stats->rate += r_tx_size;
if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
!(rd_stats->skip))
- rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
+ rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
return rd;
}
@@ -1895,6 +2313,11 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
// transforms should be considered for pruning
prune = prune_tx_types(cpi, bs, x, xd, -1);
+#if CONFIG_MRC_TX
+ // MRC_DCT only implemented for TX_32X32 so only include this tx in
+ // the search for TX_32X32
+ if (tx_type == MRC_DCT && tx_size != TX_32X32) return 1;
+#endif // CONFIG_MRC_TX
if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
return 1;
@@ -1929,7 +2352,8 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
return 0;
}
-#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
+#if CONFIG_EXT_INTER && \
+ (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA)
static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
MACROBLOCK *x, int *r, int64_t *d, int *s,
int64_t *sse, int64_t ref_best_rd) {
@@ -2020,14 +2444,13 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
if (this_rd_stats.skip)
- this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
+ this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
else
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
- this_rd_stats.dist);
+ this_rd =
+ RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
!this_rd_stats.skip)
- this_rd =
- AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
+ this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
if (this_rd < best_rd) {
best_rd = this_rd;
@@ -2068,13 +2491,12 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
continue;
}
if (this_rd_stats.skip)
- this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
+ this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
else
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
- this_rd_stats.dist);
- if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
this_rd =
- AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
+ RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
+ if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
+ this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
if (this_rd < best_rd) {
best_rd = this_rd;
@@ -2129,7 +2551,6 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
TX_TYPE best_tx_type = DCT_DCT;
#if CONFIG_TXK_SEL
TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- const int num_blk = bsize_to_num_blk(bs);
#endif // CONFIG_TXK_SEL
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
const int is_inter = is_inter_block(mbmi);
@@ -2171,8 +2592,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
rect_tx_size);
if (rd < best_rd) {
#if CONFIG_TXK_SEL
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(best_txk_type[0]) * num_blk);
+ memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256);
#endif
best_tx_type = tx_type;
best_tx_size = rect_tx_size;
@@ -2278,8 +2698,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
last_rd = rd;
if (rd < best_rd) {
#if CONFIG_TXK_SEL
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(best_txk_type[0]) * num_blk);
+ memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256);
#endif
best_tx_type = tx_type;
best_tx_size = n;
@@ -2295,7 +2714,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
mbmi->tx_size = best_tx_size;
mbmi->tx_type = best_tx_type;
#if CONFIG_TXK_SEL
- memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * num_blk);
+ memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * 256);
#endif
#if CONFIG_VAR_TX
@@ -2366,21 +2785,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
int block = 0;
for (row = 0; row < max_blocks_high; row += stepr) {
for (col = 0; col < max_blocks_wide; col += stepc) {
-#if CONFIG_CFL
- const struct macroblockd_plane *const pd = &xd->plane[0];
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-
-#if CONFIG_EC_ADAPT
- FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *const ec_ctx = cpi->common.fc;
-#endif // CONFIG_EC_ADAPT
-
- av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row,
- tx_size, plane_bsize);
-#else
av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
-#endif
block += step;
}
}
@@ -2388,7 +2793,8 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
&this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
#if CONFIG_EXT_INTRA
- if (av1_is_directional_mode(mbmi->mode, bsize)) {
+ if (av1_is_directional_mode(mbmi->mode, bsize) &&
+ av1_use_angle_delta(bsize)) {
mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
}
@@ -2405,8 +2811,8 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
}
}
#endif // CONFIG_FILTER_INTRA
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + mode_cost,
- this_rd_stats.dist);
+ this_rd =
+ RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
return this_rd;
}
@@ -2620,7 +3026,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
if (tokenonly_rd_stats.rate == INT_MAX) continue;
this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
}
@@ -2773,15 +3179,17 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
src_stride, dst, dst_stride, xd->bd);
#endif
if (is_lossless) {
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
+ TX_TYPE tx_type =
+ av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
+ const SCAN_ORDER *scan_order =
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int coeff_ctx =
combine_entropy_contexts(tempa[idx], templ[idy]);
#if !CONFIG_PVQ
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
- ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
- tempa + idx, templ + idy,
+ ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
+ scan_order, tempa + idx, templ + idy,
cpi->sf.use_fast_coef_costing);
skip = (p->eobs[block] == 0);
can_skip &= skip;
@@ -2806,28 +3214,38 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
templ[idy] = !skip;
can_skip &= skip;
#endif
- if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ if (RDCOST(x->rdmult, ratey, distortion) >= best_rd)
goto next_highbd;
#if CONFIG_PVQ
if (!skip)
#endif
av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+ mode,
+#endif
DCT_DCT, tx_size, dst, dst_stride,
p->eobs[block]);
} else {
int64_t dist;
unsigned int tmp;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
+ TX_TYPE tx_type =
+ av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
+ const SCAN_ORDER *scan_order =
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int coeff_ctx =
combine_entropy_contexts(tempa[idx], templ[idy]);
#if !CONFIG_PVQ
+#if DISABLE_TRELLISQ_SEARCH
+ av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
+ tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
+#else
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
- templ + idy);
- ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
- tempa + idx, templ + idy,
+ av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size,
+ tempa + idx, templ + idy);
+#endif // DISABLE_TRELLISQ_SEARCH
+ ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
+ scan_order, tempa + idx, templ + idy,
cpi->sf.use_fast_coef_costing);
skip = (p->eobs[block] == 0);
can_skip &= skip;
@@ -2855,19 +3273,22 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
if (!skip)
#endif
av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+ mode,
+#endif
tx_type, tx_size, dst, dst_stride,
p->eobs[block]);
cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
dist = (int64_t)tmp << 4;
distortion += dist;
- if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ if (RDCOST(x->rdmult, ratey, distortion) >= best_rd)
goto next_highbd;
}
}
}
rate += ratey;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+ this_rd = RDCOST(x->rdmult, rate, distortion);
if (this_rd < best_rd) {
*bestrate = rate;
@@ -2966,14 +3387,24 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride,
dst, dst_stride);
#endif // !CONFIG_PVQ
-
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
+ TX_TYPE tx_type =
+ av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
+ const SCAN_ORDER *scan_order =
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]);
#if CONFIG_CB4X4
block = 4 * block;
#endif // CONFIG_CB4X4
#if !CONFIG_PVQ
+#if DISABLE_TRELLISQ_SEARCH
+ av1_xform_quant(cm, x, 0, block,
+#if CONFIG_CB4X4
+ 2 * (row + idy), 2 * (col + idx),
+#else
+ row + idy, col + idx,
+#endif // CONFIG_CB4X4
+ BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
+#else
const AV1_XFORM_QUANT xform_quant =
is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
av1_xform_quant(cm, x, 0, block,
@@ -2984,12 +3415,12 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
#endif // CONFIG_CB4X4
BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
- av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
+ av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, tempa + idx,
templ + idy);
-
- ratey +=
- av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx,
- templ + idy, cpi->sf.use_fast_coef_costing);
+#endif // DISABLE_TRELLISQ_SEARCH
+ ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, scan_order,
+ tempa + idx, templ + idy,
+ cpi->sf.use_fast_coef_costing);
skip = (p->eobs[block] == 0);
can_skip &= skip;
tempa[idx] = !skip;
@@ -3028,6 +3459,9 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
if (!skip)
#endif // CONFIG_PVQ
av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+ mode,
+#endif
tx_type, tx_size, dst, dst_stride,
p->eobs[block]);
unsigned int tmp;
@@ -3036,14 +3470,16 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
distortion += dist;
}
- if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
- goto next;
+ if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next;
if (is_lossless) { // Calculate inverse txfm *after* RD cost.
#if CONFIG_PVQ
if (!skip)
#endif // CONFIG_PVQ
av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+ mode,
+#endif
DCT_DCT, tx_size, dst, dst_stride,
p->eobs[block]);
}
@@ -3051,7 +3487,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
}
rate += ratey;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+ this_rd = RDCOST(x->rdmult, rate, distortion);
if (this_rd < best_rd) {
*bestrate = rate;
@@ -3153,9 +3589,9 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
cpi, mb, idy, idx, &best_mode, bmode_costs,
xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
&ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
-#if !CONFIG_DAALA_DIST
+#if !CONFIG_DIST_8X8
if (this_rd >= best_rd - total_rd) return INT64_MAX;
-#endif // !CONFIG_DAALA_DIST
+#endif // !CONFIG_DIST_8X8
total_rd += this_rd;
cost += r;
total_distortion += d;
@@ -3172,7 +3608,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
}
mbmi->mode = mic->bmi[3].as_mode;
-#if CONFIG_DAALA_DIST
+#if CONFIG_DIST_8X8
{
const struct macroblock_plane *p = &mb->plane[0];
const struct macroblockd_plane *pd = &xd->plane[0];
@@ -3180,18 +3616,16 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
const int dst_stride = pd->dst.stride;
uint8_t *src = p->src.buf;
uint8_t *dst = pd->dst.buf;
- int use_activity_masking = 0;
- int qm = OD_HVS_QM;
#if CONFIG_PVQ
use_activity_masking = mb->daala_enc.use_activity_masking;
#endif // CONFIG_PVQ
// Daala-defined distortion computed for the block of 8x8 pixels
- total_distortion = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8,
- qm, use_activity_masking, mb->qindex)
+ total_distortion = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride,
+ BLOCK_8X8, 8, 8, 8, 8, mb->qindex)
<< 4;
}
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
// Add in the cost of the transform type
if (!is_lossless) {
int rate_tx_type = 0;
@@ -3218,7 +3652,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
*rate_y = tot_rate_y;
*distortion = total_distortion;
- return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
+ return RDCOST(mb->rdmult, cost, total_distortion);
}
#if CONFIG_FILTER_INTRA
@@ -3261,7 +3695,7 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
this_rate = tokenonly_rd_stats.rate +
av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) +
write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
@@ -3321,7 +3755,7 @@ static int64_t calc_rd_given_intra_angle(
this_rate = tokenonly_rd_stats.rate + mode_cost +
write_uniform_cost(2 * max_angle_delta + 1,
mbmi->angle_delta[0] + max_angle_delta);
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
@@ -3496,8 +3930,8 @@ static void angle_estimation(const uint8_t *src, int src_stride, int rows,
uint8_t *directional_mode_skip_mask) {
memset(directional_mode_skip_mask, 0,
INTRA_MODES * sizeof(*directional_mode_skip_mask));
- // Sub-8x8 blocks do not use extra directions.
- if (bsize < BLOCK_8X8) return;
+ // Check if angle_delta is used
+ if (!av1_use_angle_delta(bsize)) return;
uint64_t hist[DIRECTIONAL_MODES];
memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
src += src_stride;
@@ -3551,8 +3985,8 @@ static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
uint8_t *directional_mode_skip_mask) {
memset(directional_mode_skip_mask, 0,
INTRA_MODES * sizeof(*directional_mode_skip_mask));
- // Sub-8x8 blocks do not use extra directions.
- if (bsize < BLOCK_8X8) return;
+ // Check if angle_delta is used
+ if (!av1_use_angle_delta(bsize)) return;
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint64_t hist[DIRECTIONAL_MODES];
memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
@@ -3608,7 +4042,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, int64_t best_rd) {
- uint8_t mode_idx;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
MB_MODE_INFO *const mbmi = &mic->mbmi;
@@ -3683,7 +4116,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
x->use_default_intra_tx_type = 0;
/* Y Search for intra prediction mode */
- for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
+ for (int mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
RD_STATS this_rd_stats;
int this_rate, this_rate_tokenonly, s;
int64_t this_distortion, this_rd, this_model_rd;
@@ -3692,7 +4125,8 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
mbmi->mode = best_mbmi.mode;
x->use_default_intra_tx_type = 0;
} else {
- mbmi->mode = mode_idx;
+ assert(mode_idx < INTRA_MODES);
+ mbmi->mode = intra_rd_search_mode_order[mode_idx];
}
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, &pre_buf);
@@ -3708,7 +4142,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_EXT_INTRA
is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
- if (is_directional_mode) {
+ if (is_directional_mode && av1_use_angle_delta(bsize)) {
this_rd_stats.rate = INT_MAX;
rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
bmode_costs[mbmi->mode], best_rd, &best_model_rd);
@@ -3754,11 +4188,13 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
this_rate +=
cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
#endif // CONFIG_INTRA_INTERP
- this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
- MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+ if (av1_use_angle_delta(bsize)) {
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
+ MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+ }
}
#endif // CONFIG_EXT_INTRA
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
#if CONFIG_FILTER_INTRA
if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) {
filter_intra_mode_skip_mask ^= (1 << mbmi->mode);
@@ -3785,16 +4221,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
od_encode_rollback(&x->daala_enc, &post_buf);
#endif // CONFIG_PVQ
-#if CONFIG_CFL
- // Perform one extra txfm_rd_in_plane() call, this time with the best value so
- // we can store reconstructed luma values
- RD_STATS this_rd_stats;
- x->cfl_store_y = 1;
- txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize,
- mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing);
- x->cfl_store_y = 0;
-#endif
-
#if CONFIG_PALETTE
if (try_palette) {
rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
@@ -3826,7 +4252,7 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
+ const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
int plane;
int is_cost_valid = 1;
av1_init_rd_stats(rd_stats);
@@ -3857,9 +4283,8 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
break;
}
av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >
- ref_best_rd &&
- RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse) > ref_best_rd) {
+ if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd &&
+ RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) {
is_cost_valid = 0;
break;
}
@@ -3875,13 +4300,6 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
}
#if CONFIG_VAR_TX
-// FIXME crop these calls
-static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
- TX_SIZE tx_size) {
- return aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size],
- tx_size_high[tx_size]);
-}
-
void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int blk_row, int blk_col, int plane, int block,
int plane_bsize, const ENTROPY_CONTEXT *a,
@@ -3890,18 +4308,23 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
MACROBLOCKD *xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
+
+#if CONFIG_TXK_SEL
+ av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, a, l, 0, rd_stats);
+ return;
+#endif
+
int64_t tmp;
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
PLANE_TYPE plane_type = get_plane_type(plane);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
int bh = block_size_high[txm_bsize];
int bw = block_size_wide[txm_bsize];
- int txb_h = tx_size_high_unit[tx_size];
- int txb_w = tx_size_wide_unit[tx_size];
-
int src_stride = p->src.stride;
uint8_t *src =
&p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
@@ -3914,30 +4337,15 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
#else
DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
#endif // CONFIG_HIGHBITDEPTH
- int max_blocks_high = block_size_high[plane_bsize];
- int max_blocks_wide = block_size_wide[plane_bsize];
- const int diff_stride = max_blocks_wide;
+ const int diff_stride = block_size_wide[plane_bsize];
const int16_t *diff =
&p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
int txb_coeff_cost;
assert(tx_size < TX_SIZES_ALL);
- if (xd->mb_to_bottom_edge < 0)
- max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
- if (xd->mb_to_right_edge < 0)
- max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
-
- max_blocks_high >>= tx_size_wide_log2[0];
- max_blocks_wide >>= tx_size_wide_log2[0];
-
int coeff_ctx = get_entropy_context(tx_size, a, l);
- av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- coeff_ctx, AV1_XFORM_QUANT_FP);
-
- av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
-
// TODO(any): Use av1_dist_block to compute distortion
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -3954,21 +4362,35 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
0, bw, bh);
#endif // CONFIG_HIGHBITDEPTH
- if (blk_row + txb_h > max_blocks_high || blk_col + txb_w > max_blocks_wide) {
- int idx, idy;
- int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
- int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
- tmp = 0;
- for (idy = 0; idy < blocks_height; ++idy) {
- for (idx = 0; idx < blocks_width; ++idx) {
- const int16_t *d =
- diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]);
- tmp += sum_squares_2d(d, diff_stride, 0);
- }
- }
- } else {
- tmp = sum_squares_2d(diff, diff_stride, tx_size);
+#if DISABLE_TRELLISQ_SEARCH
+ av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ coeff_ctx, AV1_XFORM_QUANT_B);
+
+#else
+ av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ coeff_ctx, AV1_XFORM_QUANT_FP);
+
+ const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ const int buffer_length = tx_size_2d[tx_size];
+ int64_t tmp_dist;
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ tmp_dist =
+ av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >>
+ shift;
+ else
+#endif
+ tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift;
+
+ if (RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
+ av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
+ a, l);
}
+#endif // DISABLE_TRELLISQ_SEARCH
+
+ tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
+ plane_bsize, txm_bsize);
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
@@ -3977,36 +4399,48 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
rd_stats->sse += tmp * 16;
const int eob = p->eobs[block];
+#if CONFIG_LGT
+ PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
+ av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, rec_buffer,
+ MAX_TX_SIZE, eob);
+#else
av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer,
MAX_TX_SIZE, eob);
+#endif
if (eob > 0) {
- if (txb_w + blk_col > max_blocks_wide ||
- txb_h + blk_row > max_blocks_high) {
- int idx, idy;
- unsigned int this_dist;
- int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
- int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
- tmp = 0;
- for (idy = 0; idy < blocks_height; ++idy) {
- for (idx = 0; idx < blocks_width; ++idx) {
- uint8_t *const s =
- src + ((idy * src_stride + idx) << tx_size_wide_log2[0]);
- uint8_t *const r =
- rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]);
- cpi->fn_ptr[0].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
- tmp += this_dist;
- }
+#if CONFIG_DIST_8X8
+ if (plane == 0 && (bw < 8 && bh < 8)) {
+ // Save sub8x8 luma decoded pixels
+ // since 8x8 luma decoded pixels are not available for daala-dist
+ // after recursive split of BLOCK_8x8 is done.
+ const int pred_stride = block_size_wide[plane_bsize];
+ const int pred_idx = (blk_row * pred_stride + blk_col)
+ << tx_size_wide_log2[0];
+ int16_t *decoded = &pd->pred[pred_idx];
+ int i, j;
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bh; j++)
+ for (i = 0; i < bw; i++)
+ decoded[j * pred_stride + i] =
+ CONVERT_TO_SHORTPTR(rec_buffer)[j * MAX_TX_SIZE + i];
+ } else {
+#endif
+ for (j = 0; j < bh; j++)
+ for (i = 0; i < bw; i++)
+ decoded[j * pred_stride + i] = rec_buffer[j * MAX_TX_SIZE + i];
+#if CONFIG_HIGHBITDEPTH
}
- } else {
- uint32_t this_dist;
- cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE,
- &this_dist);
- tmp = this_dist;
+#endif // CONFIG_HIGHBITDEPTH
}
+#endif // CONFIG_DIST_8X8
+ tmp = pixel_dist(cpi, x, plane, src, src_stride, rec_buffer, MAX_TX_SIZE,
+ blk_row, blk_col, plane_bsize, txm_bsize);
}
rd_stats->dist += tmp * 16;
- txb_coeff_cost =
- av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, 0);
+ txb_coeff_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block,
+ tx_size, scan_order, a, l, 0);
rd_stats->rate += txb_coeff_cost;
rd_stats->skip &= (eob == 0);
@@ -4038,14 +4472,35 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
int64_t this_rd = INT64_MAX;
ENTROPY_CONTEXT *pta = ta + blk_col;
ENTROPY_CONTEXT *ptl = tl + blk_row;
- int coeff_ctx, i;
+ int i;
int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
mbmi->sb_type, tx_size);
int64_t sum_rd = INT64_MAX;
int tmp_eob = 0;
int zero_blk_rate;
RD_STATS sum_rd_stats;
- const int tx_size_ctx = txsize_sqr_map[tx_size];
+#if CONFIG_TXK_SEL
+ TX_TYPE best_tx_type = TX_TYPES;
+ int txk_idx = (blk_row << 4) + blk_col;
+#endif
+#if CONFIG_RECT_TX_EXT
+ TX_SIZE quarter_txsize = quarter_txsize_lookup[mbmi->sb_type];
+ int check_qttx = is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) &&
+ tx_size == max_txsize_rect_lookup[mbmi->sb_type] &&
+ quarter_txsize != tx_size;
+ int is_qttx_picked = 0;
+ int eobs_qttx[2] = { 0, 0 };
+ int skip_qttx[2] = { 0, 0 };
+ int block_offset_qttx = check_qttx
+ ? tx_size_wide_unit[quarter_txsize] *
+ tx_size_high_unit[quarter_txsize]
+ : 0;
+ int blk_row_offset, blk_col_offset;
+ int is_wide_qttx =
+ tx_size_wide_unit[quarter_txsize] > tx_size_high_unit[quarter_txsize];
+ blk_row_offset = is_wide_qttx ? tx_size_high_unit[quarter_txsize] : 0;
+ blk_col_offset = is_wide_qttx ? 0 : tx_size_wide_unit[quarter_txsize];
+#endif
av1_init_rd_stats(&sum_rd_stats);
@@ -4056,15 +4511,25 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
return;
}
- coeff_ctx = get_entropy_context(tx_size, pta, ptl);
-
av1_init_rd_stats(rd_stats);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+#if CONFIG_LV_MAP
+ TX_SIZE txs_ctx = get_txsize_context(tx_size);
+ TXB_CTX txb_ctx;
+ get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx);
+ zero_blk_rate =
+ av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_ctx.txb_skip_ctx], 1);
+#else
+ int tx_size_ctx = txsize_sqr_map[tx_size];
+ int coeff_ctx = get_entropy_context(tx_size, pta, ptl);
zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
[coeff_ctx][EOB_TOKEN];
+#endif
+ rd_stats->ref_rdcost = ref_best_rd;
+ rd_stats->zero_rate = zero_blk_rate;
if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
inter_tx_size[0][0] = tx_size;
@@ -4081,8 +4546,8 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
}
}
- if ((RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >=
- RDCOST(x->rdmult, x->rddiv, zero_blk_rate, rd_stats->sse) ||
+ if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
+ RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
rd_stats->skip == 1) &&
!xd->lossless[mbmi->segment_id]) {
#if CONFIG_RD_DEBUG
@@ -4094,6 +4559,9 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
rd_stats->skip = 1;
x->blk_skip[plane][blk_row * bw + blk_col] = 1;
p->eobs[block] = 0;
+#if CONFIG_TXK_SEL
+ mbmi->txk_type[txk_idx] = DCT_DCT;
+#endif
} else {
x->blk_skip[plane][blk_row * bw + blk_col] = 0;
rd_stats->skip = 0;
@@ -4102,23 +4570,143 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
rd_stats->rate +=
av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
- this_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
+#if CONFIG_RECT_TX_EXT
+ if (check_qttx) {
+ assert(blk_row == 0 && blk_col == 0);
+ rd_stats->rate += av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 0);
+ }
+#endif
+ this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
+#if CONFIG_LV_MAP
+ tmp_eob = p->txb_entropy_ctx[block];
+#else
tmp_eob = p->eobs[block];
+#endif
+
+#if CONFIG_TXK_SEL
+ best_tx_type = mbmi->txk_type[txk_idx];
+#endif
+
+#if CONFIG_RECT_TX_EXT
+ if (check_qttx) {
+ assert(blk_row == 0 && blk_col == 0 && block == 0 && plane == 0);
+
+ RD_STATS rd_stats_tmp, rd_stats_qttx;
+ int64_t rd_qttx;
+
+ av1_init_rd_stats(&rd_stats_qttx);
+ av1_init_rd_stats(&rd_stats_tmp);
+
+ av1_tx_block_rd_b(cpi, x, quarter_txsize, 0, 0, plane, 0, plane_bsize,
+ pta, ptl, &rd_stats_qttx);
+
+ tx_size_ctx = txsize_sqr_map[quarter_txsize];
+ coeff_ctx = get_entropy_context(quarter_txsize, pta, ptl);
+ zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
+ [coeff_ctx][EOB_TOKEN];
+ if ((RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist) >=
+ RDCOST(x->rdmult, zero_blk_rate, rd_stats_qttx.sse) ||
+ rd_stats_qttx.skip == 1) &&
+ !xd->lossless[mbmi->segment_id]) {
+#if CONFIG_RD_DEBUG
+ av1_update_txb_coeff_cost(&rd_stats_qttx, plane, quarter_txsize, 0, 0,
+ zero_blk_rate - rd_stats_qttx.rate);
+#endif // CONFIG_RD_DEBUG
+ rd_stats_qttx.rate = zero_blk_rate;
+ rd_stats_qttx.dist = rd_stats_qttx.sse;
+ rd_stats_qttx.skip = 1;
+ x->blk_skip[plane][blk_row * bw + blk_col] = 1;
+ skip_qttx[0] = 1;
+ p->eobs[block] = 0;
+ } else {
+ x->blk_skip[plane][blk_row * bw + blk_col] = 0;
+ skip_qttx[0] = 0;
+ rd_stats->skip = 0;
+ }
+
+ // Second tx block
+ av1_tx_block_rd_b(cpi, x, quarter_txsize, blk_row_offset, blk_col_offset,
+ plane, block_offset_qttx, plane_bsize, pta, ptl,
+ &rd_stats_tmp);
+
+ av1_set_txb_context(x, plane, 0, quarter_txsize, pta, ptl);
+ coeff_ctx = get_entropy_context(quarter_txsize, pta + blk_col_offset,
+ ptl + blk_row_offset);
+ zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
+ [coeff_ctx][EOB_TOKEN];
+ if ((RDCOST(x->rdmult, rd_stats_tmp.rate, rd_stats_tmp.dist) >=
+ RDCOST(x->rdmult, zero_blk_rate, rd_stats_tmp.sse) ||
+ rd_stats_tmp.skip == 1) &&
+ !xd->lossless[mbmi->segment_id]) {
+#if CONFIG_RD_DEBUG
+ av1_update_txb_coeff_cost(&rd_stats_tmp, plane, quarter_txsize, 0, 0,
+ zero_blk_rate - rd_stats_tmp.rate);
+#endif // CONFIG_RD_DEBUG
+ rd_stats_tmp.rate = zero_blk_rate;
+ rd_stats_tmp.dist = rd_stats_tmp.sse;
+ rd_stats_tmp.skip = 1;
+ x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 1;
+ skip_qttx[1] = 1;
+ p->eobs[block_offset_qttx] = 0;
+ } else {
+ x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 0;
+ skip_qttx[1] = 0;
+ rd_stats_tmp.skip = 0;
+ }
+
+ av1_merge_rd_stats(&rd_stats_qttx, &rd_stats_tmp);
+
+ if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
+ rd_stats_qttx.rate +=
+ av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
+ }
+ rd_stats_qttx.rate +=
+ av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 1);
+ rd_qttx = RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist);
+#if CONFIG_LV_MAP
+ eobs_qttx[0] = p->txb_entropy_ctx[0];
+ eobs_qttx[1] = p->txb_entropy_ctx[block_offset_qttx];
+#else
+ eobs_qttx[0] = p->eobs[0];
+ eobs_qttx[1] = p->eobs[block_offset_qttx];
+#endif
+ if (rd_qttx < this_rd) {
+ is_qttx_picked = 1;
+ this_rd = rd_qttx;
+ rd_stats->rate = rd_stats_qttx.rate;
+ rd_stats->dist = rd_stats_qttx.dist;
+ rd_stats->sse = rd_stats_qttx.sse;
+ rd_stats->skip = rd_stats_qttx.skip;
+ rd_stats->rdcost = rd_stats_qttx.rdcost;
+ }
+ av1_get_entropy_contexts(plane_bsize, 0, pd, ta, tl);
+ }
+#endif
}
+#if CONFIG_MRC_TX
+ // If the tx type we are trying is MRC_DCT, we cannot partition the transform
+ // into anything smaller than TX_32X32
+ if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && mbmi->tx_type != MRC_DCT) {
+#else
if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
+#endif
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bsl = tx_size_wide_unit[sub_txs];
int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
RD_STATS this_rd_stats;
int this_cost_valid = 1;
int64_t tmp_rd = 0;
-
+#if CONFIG_DIST_8X8
+ int sub8x8_eob[4];
+#endif
sum_rd_stats.rate =
av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
assert(tx_size < TX_SIZES_ALL);
+ ref_best_rd = AOMMIN(this_rd, ref_best_rd);
+
for (i = 0; i < 4 && this_cost_valid; ++i) {
int offsetr = blk_row + (i >> 1) * bsl;
int offsetc = blk_col + (i & 0x01) * bsl;
@@ -4129,30 +4717,170 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
depth + 1, plane_bsize, ta, tl, tx_above, tx_left,
&this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid,
rd_stats_stack);
-
+#if CONFIG_DIST_8X8
+ if (plane == 0 && tx_size == TX_8X8) {
+ sub8x8_eob[i] = p->eobs[block];
+ }
+#endif // CONFIG_DIST_8X8
av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
- tmp_rd =
- RDCOST(x->rdmult, x->rddiv, sum_rd_stats.rate, sum_rd_stats.dist);
+ tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
+#if !CONFIG_DIST_8X8
if (this_rd < tmp_rd) break;
+#endif
block += sub_step;
}
+#if CONFIG_DIST_8X8
+ if (this_cost_valid && plane == 0 && tx_size == TX_8X8) {
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+
+ const uint8_t *src =
+ &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
+ const uint8_t *dst =
+ &pd->dst
+ .buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+
+ int64_t dist_8x8;
+ int qindex = x->qindex;
+ const int pred_stride = block_size_wide[plane_bsize];
+ const int pred_idx = (blk_row * pred_stride + blk_col)
+ << tx_size_wide_log2[0];
+ int16_t *pred = &pd->pred[pred_idx];
+ int j;
+ int row, col;
+
+#if CONFIG_HIGHBITDEPTH
+ uint8_t *pred8;
+ DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
+#endif // CONFIG_HIGHBITDEPTH
+
+ dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride,
+ BLOCK_8X8, 8, 8, 8, 8, qindex) *
+ 16;
+ sum_rd_stats.sse = dist_8x8;
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ pred8 = CONVERT_TO_BYTEPTR(pred8_16);
+ else
+ pred8 = (uint8_t *)pred8_16;
+#endif
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (row = 0; row < 2; ++row) {
+ for (col = 0; col < 2; ++col) {
+ int idx = row * 2 + col;
+ int eob = sub8x8_eob[idx];
+
+ if (eob > 0) {
+ for (j = 0; j < 4; j++)
+ for (i = 0; i < 4; i++)
+ CONVERT_TO_SHORTPTR(pred8)
+ [(row * 4 + j) * 8 + 4 * col + i] =
+ pred[(row * 4 + j) * pred_stride + 4 * col + i];
+ } else {
+ for (j = 0; j < 4; j++)
+ for (i = 0; i < 4; i++)
+ CONVERT_TO_SHORTPTR(pred8)
+ [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR(
+ dst)[(row * 4 + j) * dst_stride + 4 * col + i];
+ }
+ }
+ }
+ } else {
+#endif
+ for (row = 0; row < 2; ++row) {
+ for (col = 0; col < 2; ++col) {
+ int idx = row * 2 + col;
+ int eob = sub8x8_eob[idx];
+
+ if (eob > 0) {
+ for (j = 0; j < 4; j++)
+ for (i = 0; i < 4; i++)
+ pred8[(row * 4 + j) * 8 + 4 * col + i] =
+ pred[(row * 4 + j) * pred_stride + 4 * col + i];
+ } else {
+ for (j = 0; j < 4; j++)
+ for (i = 0; i < 4; i++)
+ pred8[(row * 4 + j) * 8 + 4 * col + i] =
+ dst[(row * 4 + j) * dst_stride + 4 * col + i];
+ }
+ }
+ }
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+ dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, 8, BLOCK_8X8, 8,
+ 8, 8, 8, qindex) *
+ 16;
+ sum_rd_stats.dist = dist_8x8;
+ tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
+ }
+#endif // CONFIG_DIST_8X8
if (this_cost_valid) sum_rd = tmp_rd;
}
if (this_rd < sum_rd) {
int idx, idy;
- for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0);
- for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0);
+#if CONFIG_RECT_TX_EXT
+ TX_SIZE tx_size_selected = is_qttx_picked ? quarter_txsize : tx_size;
+#else
+ TX_SIZE tx_size_selected = tx_size;
+#endif
+
+#if CONFIG_RECT_TX_EXT
+ if (is_qttx_picked) {
+ assert(blk_row == 0 && blk_col == 0 && plane == 0);
+#if CONFIG_LV_MAP
+ p->txb_entropy_ctx[0] = eobs_qttx[0];
+ p->txb_entropy_ctx[block_offset_qttx] = eobs_qttx[1];
+#else
+ p->eobs[0] = eobs_qttx[0];
+ p->eobs[block_offset_qttx] = eobs_qttx[1];
+#endif
+ } else {
+#endif
+#if CONFIG_LV_MAP
+ p->txb_entropy_ctx[block] = tmp_eob;
+#else
+ p->eobs[block] = tmp_eob;
+#endif
+#if CONFIG_RECT_TX_EXT
+ }
+#endif
+
+ av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl);
+#if CONFIG_RECT_TX_EXT
+ if (is_qttx_picked)
+ av1_set_txb_context(x, plane, block_offset_qttx, tx_size_selected,
+ pta + blk_col_offset, ptl + blk_row_offset);
+#endif
+
txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
tx_size);
- inter_tx_size[0][0] = tx_size;
+ inter_tx_size[0][0] = tx_size_selected;
for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
- inter_tx_size[idy][idx] = tx_size;
- mbmi->tx_size = tx_size;
+ inter_tx_size[idy][idx] = tx_size_selected;
+ mbmi->tx_size = tx_size_selected;
+#if CONFIG_TXK_SEL
+ mbmi->txk_type[txk_idx] = best_tx_type;
+#endif
if (this_rd == INT64_MAX) *is_cost_valid = 0;
- x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
+#if CONFIG_RECT_TX_EXT
+ if (is_qttx_picked) {
+ x->blk_skip[plane][0] = skip_qttx[0];
+ x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = skip_qttx[1];
+ } else {
+#endif
+ x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
+#if CONFIG_RECT_TX_EXT
+ }
+#endif
} else {
*rd_stats = sum_rd_stats;
if (sum_rd == INT64_MAX) *is_cost_valid = 0;
@@ -4201,17 +4929,16 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd,
&is_cost_valid, rd_stats_stack);
av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- this_rd += AOMMIN(
- RDCOST(x->rdmult, x->rddiv, pn_rd_stats.rate, pn_rd_stats.dist),
- RDCOST(x->rdmult, x->rddiv, 0, pn_rd_stats.sse));
+ this_rd += AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
+ RDCOST(x->rdmult, 0, pn_rd_stats.sse));
block += step;
++block32;
}
}
}
- this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
- RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
+ this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
+ RDCOST(x->rdmult, 0, rd_stats->sse));
if (this_rd > ref_best_rd) is_cost_valid = 0;
if (!is_cost_valid) {
@@ -4247,6 +4974,7 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
mbmi->min_tx_size = AOMMIN(
mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col]));
+#if !CONFIG_TXK_SEL
#if CONFIG_EXT_TX
if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
cm->reduced_tx_set_used) > 1 &&
@@ -4266,20 +4994,21 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
[mbmi->tx_type];
}
}
-#else // CONFIG_EXT_TX
+#else
if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
rd_stats->rate +=
cpi->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
#endif // CONFIG_EXT_TX
+#endif // CONFIG_TXK_SEL
if (rd_stats->skip)
- rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
+ rd = RDCOST(x->rdmult, s1, rd_stats->sse);
else
- rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist);
+ rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
!(rd_stats->skip))
- rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
+ rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
return rd;
}
@@ -4299,6 +5028,12 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
TX_SIZE best_tx = max_txsize_lookup[bsize];
TX_SIZE best_min_tx_size = TX_SIZES_ALL;
uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ TX_TYPE txk_start = DCT_DCT;
+#if CONFIG_TXK_SEL
+ TX_TYPE txk_end = DCT_DCT + 1;
+#else
+ TX_TYPE txk_end = TX_TYPES;
+#endif
const int n4 = bsize_to_num_blk(bsize);
int idx, idy;
int prune = 0;
@@ -4326,9 +5061,14 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
for (idx = 0; idx < count32; ++idx)
av1_invalid_rd_stats(&rd_stats_stack[idx]);
- for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+ for (tx_type = txk_start; tx_type < txk_end; ++tx_type) {
RD_STATS this_rd_stats;
av1_init_rd_stats(&this_rd_stats);
+#if CONFIG_MRC_TX
+ // MRC_DCT only implemented for TX_32X32 so only include this tx in
+ // the search for TX_32X32
+ if (tx_type == MRC_DCT && max_tx_size != TX_32X32) continue;
+#endif // CONFIG_MRC_TX
#if CONFIG_EXT_TX
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
@@ -4384,7 +5124,6 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
const int tx_row = blk_row >> (1 - pd->subsampling_y);
@@ -4402,16 +5141,11 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
: mbmi->inter_tx_size[tx_row][tx_col];
if (tx_size == plane_tx_size) {
- int i;
ENTROPY_CONTEXT *ta = above_ctx + blk_col;
ENTROPY_CONTEXT *tl = left_ctx + blk_row;
av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
plane_bsize, ta, tl, rd_stats);
-
- for (i = 0; i < tx_size_wide_unit[tx_size]; ++i)
- ta[i] = !(p->eobs[block] == 0);
- for (i = 0; i < tx_size_high_unit[tx_size]; ++i)
- tl[i] = !(p->eobs[block] == 0);
+ av1_set_txb_context(x, plane, block, tx_size, ta, tl);
} else {
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bsl = tx_size_wide_unit[sub_txs];
@@ -4498,9 +5232,8 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- this_rd =
- AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
- RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
+ this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
+ RDCOST(x->rdmult, 0, rd_stats->sse));
if (this_rd > ref_best_rd) {
is_cost_valid = 0;
@@ -4543,7 +5276,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
&plane_block_height, &rows, &cols);
if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
#if CONFIG_FILTER_INTRA
mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
#endif // CONFIG_FILTER_INTRA
@@ -4689,7 +5422,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
}
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
*best_mbmi = *mbmi;
@@ -4727,7 +5460,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
av1_zero(filter_intra_mode_info);
mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
#if CONFIG_PALETTE
mbmi->palette_mode_info.palette_size[1] = 0;
#endif // CONFIG_PALETTE
@@ -4741,7 +5474,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) +
cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
write_uniform_cost(FILTER_INTRA_MODES, mode);
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
*rate = this_rate;
@@ -4754,7 +5487,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
if (filter_intra_selected_flag) {
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
filter_intra_mode_info.use_filter_intra_mode[1];
mbmi->filter_intra_mode_info.filter_intra_mode[1] =
@@ -4782,7 +5515,7 @@ static int64_t pick_intra_angle_routine_sbuv(
if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
return INT64_MAX;
this_rate = tokenonly_rd_stats.rate + rate_overhead;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
*best_angle_delta = mbmi->angle_delta[1];
@@ -4852,8 +5585,172 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
#endif // CONFIG_EXT_INTRA
+#if CONFIG_CFL
+static int64_t cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
+ const int y_averages_q3[MAX_NUM_TXB],
+ const uint8_t *src, int src_stride, int width,
+ int height, TX_SIZE tx_size, int dc_pred,
+ int alpha_q3, int64_t *dist_neg_out) {
+ int64_t dist = 0;
+ int diff;
+
+ if (alpha_q3 == 0) {
+ for (int j = 0; j < height; j++) {
+ for (int i = 0; i < width; i++) {
+ diff = src[i] - dc_pred;
+ dist += diff * diff;
+ }
+ src += src_stride;
+ }
+
+ if (dist_neg_out) *dist_neg_out = dist;
+
+ return dist;
+ }
+
+ int64_t dist_neg = 0;
+ const int tx_height = tx_size_high[tx_size];
+ const int tx_width = tx_size_wide[tx_size];
+ const int y_block_row_off = y_stride * tx_height;
+ const int src_block_row_off = src_stride * tx_height;
+ const uint8_t *t_y_pix;
+ const uint8_t *t_src;
+ int a = 0;
+ for (int b_j = 0; b_j < height; b_j += tx_height) {
+ const int h = b_j + tx_height;
+ for (int b_i = 0; b_i < width; b_i += tx_width) {
+ const int w = b_i + tx_width;
+ const int tx_avg_q3 = y_averages_q3[a++];
+ t_y_pix = y_pix;
+ t_src = src;
+ for (int t_j = b_j; t_j < h; t_j++) {
+ for (int t_i = b_i; t_i < w; t_i++) {
+ const int uv = t_src[t_i];
+
+ const int scaled_luma =
+ get_scaled_luma_q0(alpha_q3, t_y_pix[t_i], tx_avg_q3);
+
+ // TODO(ltrudeau) add support for HBD.
+ diff = uv - clamp(scaled_luma + dc_pred, 0, 255);
+ dist += diff * diff;
+
+ // TODO(ltrudeau) add support for HBD.
+ diff = uv - clamp(-scaled_luma + dc_pred, 0, 255);
+ dist_neg += diff * diff;
+ }
+ t_y_pix += y_stride;
+ t_src += src_stride;
+ }
+ }
+ y_pix += y_block_row_off;
+ src += src_block_row_off;
+ }
+
+ if (dist_neg_out) *dist_neg_out = dist_neg;
+
+ return dist;
+}
+
+static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
+ assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
+ AOM_ICDF(CDF_PROB_TOP));
+
+ aom_cdf_prob prev_cdf = 0;
+
+ for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
+ const int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
+ (cfl_alpha_codes[c][CFL_PRED_V] != 0);
+
+ aom_cdf_prob prob = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - prev_cdf;
+ prev_cdf = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]);
+
+ cfl->costs[c] = av1_cost_symbol(prob) + av1_cost_literal(sign_bit_cost);
+ }
+}
+
+static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
+ const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
+ const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
+ const uint8_t *const src_u = p_u->src.buf;
+ const uint8_t *const src_v = p_v->src.buf;
+ const int src_stride_u = p_u->src.stride;
+ const int src_stride_v = p_v->src.stride;
+
+ MACROBLOCKD *const xd = &x->e_mbd;
+ FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+
+ CFL_CTX *const cfl = xd->cfl;
+ cfl_compute_parameters(xd, tx_size);
+ const int width = cfl->uv_width;
+ const int height = cfl->uv_height;
+ const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
+ const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
+ const int *y_averages_q3 = cfl->y_averages_q3;
+ const uint8_t *y_pix = cfl->y_down_pix;
+
+ CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
+
+ cfl_update_costs(cfl, ec_ctx);
+
+ int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
+ sse[CFL_PRED_U][0] =
+ cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u,
+ width, height, tx_size, dc_pred_u, 0, NULL);
+ sse[CFL_PRED_V][0] =
+ cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v,
+ width, height, tx_size, dc_pred_v, 0, NULL);
+
+ for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
+ assert(cfl_alpha_mags_q3[m + 1] == -cfl_alpha_mags_q3[m]);
+ sse[CFL_PRED_U][m] = cfl_alpha_dist(
+ y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height,
+ tx_size, dc_pred_u, cfl_alpha_mags_q3[m], &sse[CFL_PRED_U][m + 1]);
+ sse[CFL_PRED_V][m] = cfl_alpha_dist(
+ y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height,
+ tx_size, dc_pred_v, cfl_alpha_mags_q3[m], &sse[CFL_PRED_V][m + 1]);
+ }
+
+ int64_t dist;
+ int64_t cost;
+ int64_t best_cost;
+
+ // Compute least squares parameter of the entire block
+ // IMPORTANT: We assume that the first code is 0,0
+ int ind = 0;
+ signs[CFL_PRED_U] = CFL_SIGN_POS;
+ signs[CFL_PRED_V] = CFL_SIGN_POS;
+
+ dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
+ dist *= 16;
+ best_cost = RDCOST(x->rdmult, cfl->costs[0], dist);
+
+ for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
+ const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
+ const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
+ for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
+ for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
+ dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
+ sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
+ dist *= 16;
+ cost = RDCOST(x->rdmult, cfl->costs[c], dist);
+ if (cost < best_cost) {
+ best_cost = cost;
+ ind = c;
+ signs[CFL_PRED_U] = sign_u;
+ signs[CFL_PRED_V] = sign_v;
+ }
+ }
+ }
+ }
+
+ mbmi->cfl_alpha_idx = ind;
+ return cfl->costs[ind];
+}
+#endif // CONFIG_CFL
+
static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
#if CONFIG_PALETTE
mbmi->palette_mode_info.palette_size[1] = 0;
#endif // CONFIG_PALETTE
@@ -4870,20 +5767,19 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
assert(!is_inter_block(mbmi));
MB_MODE_INFO best_mbmi = *mbmi;
- PREDICTION_MODE mode;
int64_t best_rd = INT64_MAX, this_rd;
- int this_rate;
- RD_STATS tokenonly_rd_stats;
#if CONFIG_PVQ
od_rollback_buffer buf;
od_encode_checkpoint(&x->daala_enc, &buf);
#endif // CONFIG_PVQ
#if CONFIG_PALETTE
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- uint8_t *best_palette_color_map = NULL;
#endif // CONFIG_PALETTE
- for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+ for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
+ int this_rate;
+ RD_STATS tokenonly_rd_stats;
+ UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
#if CONFIG_EXT_INTRA
const int is_directional_mode =
av1_is_directional_mode(mode, mbmi->sb_type);
@@ -4893,9 +5789,16 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
continue;
mbmi->uv_mode = mode;
+#if CONFIG_CFL
+ int cfl_alpha_rate = 0;
+ if (mode == UV_DC_PRED) {
+ const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
+ cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size);
+ }
+#endif
#if CONFIG_EXT_INTRA
mbmi->angle_delta[1] = 0;
- if (is_directional_mode) {
+ if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
@@ -4915,8 +5818,13 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
this_rate =
tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
+#if CONFIG_CFL
+ if (mode == UV_DC_PRED) {
+ this_rate += cfl_alpha_rate;
+ }
+#endif
#if CONFIG_EXT_INTRA
- if (is_directional_mode) {
+ if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
}
@@ -4927,7 +5835,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#endif // CONFIG_FILTER_INTRA
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 &&
- mode == DC_PRED)
+ mode == UV_DC_PRED)
this_rate += av1_cost_bit(
av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
#endif // CONFIG_PALETTE
@@ -4935,7 +5843,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, &buf);
#endif // CONFIG_PVQ
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < best_rd) {
best_mbmi = *mbmi;
@@ -4949,9 +5857,9 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) {
- best_palette_color_map = x->palette_buffer->best_palette_color_map;
+ uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
rd_pick_palette_intra_sbuv(cpi, x,
- cpi->intra_uv_mode_cost[mbmi->mode][DC_PRED],
+ cpi->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED],
best_palette_color_map, &best_mbmi, &best_rd,
rate, rate_tokenonly, distortion, skippable);
}
@@ -4975,7 +5883,7 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
TX_SIZE max_tx_size, int *rate_uv,
int *rate_uv_tokenonly, int64_t *dist_uv,
- int *skip_uv, PREDICTION_MODE *mode_uv) {
+ int *skip_uv, UV_PREDICTION_MODE *mode_uv) {
// Use an estimated rd for uv_intra based on DC_PRED if the
// appropriate speed flag is set.
(void)ctx;
@@ -4990,7 +5898,7 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
*rate_uv_tokenonly = 0;
*dist_uv = 0;
*skip_uv = 1;
- *mode_uv = DC_PRED;
+ *mode_uv = UV_DC_PRED;
return;
}
BLOCK_SIZE bs = scale_chroma_bsize(bsize, x->e_mbd.plane[1].subsampling_x,
@@ -5011,6 +5919,12 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
if (is_inter_compound_mode(mode)) {
return cpi
->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(mode)) {
+ return cpi
+ ->inter_singleref_comp_mode_cost[mode_context]
+ [INTER_SINGLEREF_COMP_OFFSET(mode)];
+#endif // CONFIG_COMPOUND_SINGLEREF
}
#endif
@@ -5096,8 +6010,13 @@ typedef struct {
int segment_yrate;
PREDICTION_MODE modes[4];
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ SEG_RDSTAT rdstat[4][INTER_MODES + INTER_SINGLEREF_COMP_MODES +
+ INTER_COMPOUND_MODES];
+#else // !CONFIG_COMPOUND_SINGLEREF
SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
-#else
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
SEG_RDSTAT rdstat[4][INTER_MODES];
#endif // CONFIG_EXT_INTER
int mvthresh;
@@ -5120,27 +6039,28 @@ static int check_best_zero_mv(
int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
int mi_row, int mi_col) {
- int_mv zeromv[2];
+ int_mv zeromv[2] = { {.as_int = 0 } };
+#if CONFIG_GLOBAL_MOTION
int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
- int cur_frm;
+#endif
(void)mi_row;
(void)mi_col;
- for (cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
#if CONFIG_GLOBAL_MOTION
- if (this_mode == ZEROMV
+ if (this_mode == ZEROMV
#if CONFIG_EXT_INTER
- || this_mode == ZERO_ZEROMV
+ || this_mode == ZERO_ZEROMV
#endif // CONFIG_EXT_INTER
- )
+ ) {
+ for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
zeromv[cur_frm].as_int =
gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
cpi->common.allow_high_precision_mv, bsize,
mi_col, mi_row, block)
.as_int;
- else
-#endif // CONFIG_GLOBAL_MOTION
- zeromv[cur_frm].as_int = 0;
+ }
}
+#endif // CONFIG_GLOBAL_MOTION
+
#if !CONFIG_EXT_INTER
assert(ref_frames[1] != INTRA_FRAME); // Just sanity check
#endif // !CONFIG_EXT_INTER
@@ -5201,8 +6121,11 @@ static int check_best_zero_mv(
}
static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
- int mi_col,
+ BLOCK_SIZE bsize, int_mv *frame_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv *frame_comp_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int mi_row, int mi_col,
#if CONFIG_EXT_INTER
int_mv *ref_mv_sub8x8[2], const uint8_t *mask,
int mask_stride,
@@ -5213,35 +6136,47 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
const int ph = block_size_high[bsize];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- // This function should only ever be called for compound modes
+// This function should only ever be called for compound modes
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) {
+ assert(is_inter_singleref_comp_mode(mbmi->mode));
+ assert(frame_comp_mv);
+ }
+ assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode));
+ const int refs[2] = { mbmi->ref_frame[0], has_second_ref(mbmi)
+ ? mbmi->ref_frame[1]
+ : mbmi->ref_frame[0] };
+#else
assert(has_second_ref(mbmi));
const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
int_mv ref_mv[2];
int ite, ref;
-#if CONFIG_DUAL_FILTER
- InterpFilter interp_filter[4] = {
- mbmi->interp_filter[0], mbmi->interp_filter[1], mbmi->interp_filter[2],
- mbmi->interp_filter[3],
- };
-#else
- const InterpFilter interp_filter = mbmi->interp_filter;
-#endif // CONFIG_DUAL_FILTER
struct scale_factors sf;
- struct macroblockd_plane *const pd = &xd->plane[0];
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
// ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
const int ic = block & 1;
const int ir = (block - ic) >> 1;
+ struct macroblockd_plane *const pd = &xd->plane[0];
const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
#if CONFIG_GLOBAL_MOTION
int is_global[2];
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#else
for (ref = 0; ref < 2; ++ref) {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
WarpedMotionParams *const wm =
&xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) is_global[1] = is_global[0];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
#endif // CONFIG_GLOBAL_MOTION
+#else // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ (void)block;
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
// Do joint motion search in compound mode to get more accurate mv.
@@ -5264,7 +6199,11 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
(void)ref_mv_sub8x8;
#endif // CONFIG_EXT_INTER && CONFIG_CB4X4
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#else
for (ref = 0; ref < 2; ++ref) {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
@@ -5284,6 +6223,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
}
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) {
+ assert(is_inter_singleref_comp_mode(mbmi->mode));
+ // NOTE: For single ref comp mode, set up the 2nd set of ref_mv/pre_planes
+ // all from the 1st reference frame, i.e. refs[0].
+ ref_mv[1] = x->mbmi_ext->ref_mvs[refs[0]][0];
+ if (scaled_ref_frame[0]) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[1][i] = xd->plane[i].pre[1];
+ av1_setup_pre_planes(xd, 1, scaled_ref_frame[0], mi_row, mi_col, NULL);
+ }
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
// Since we have scaled the reference frames to match the size of the current
// frame we must use a unit scaling factor during mode selection.
#if CONFIG_HIGHBITDEPTH
@@ -5294,9 +6251,16 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
cm->height);
#endif // CONFIG_HIGHBITDEPTH
- // Allow joint search multiple times iteratively for each reference frame
- // and break out of the search loop if it couldn't find a better mv.
+// Allow joint search multiple times iteratively for each reference frame
+// and break out of the search loop if it couldn't find a better mv.
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ const int num_ites =
+ (has_second_ref(mbmi) || mbmi->mode == SR_NEW_NEWMV) ? 4 : 1;
+ const int start_ite = has_second_ref(mbmi) ? 0 : 1;
+ for (ite = start_ite; ite < (start_ite + num_ites); ite++) {
+#else
for (ite = 0; ite < 4; ite++) {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
struct buf_2d ref_yv12[2];
int bestsme = INT_MAX;
int sadpb = x->sadperbit16;
@@ -5308,7 +6272,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
// odd iterations search in the second. The predictor
// found for the 'other' reference frame is factored in.
const int plane = 0;
- ConvolveParams conv_params = get_conv_params(0, plane);
+ ConvolveParams conv_params = get_conv_params(!id, 0, plane);
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
WarpTypesAllowed warp_types;
#if CONFIG_GLOBAL_MOTION
@@ -5323,21 +6287,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
ref_yv12[0] = xd->plane[plane].pre[0];
ref_yv12[1] = xd->plane[plane].pre[1];
-#if CONFIG_DUAL_FILTER
- // reload the filter types
- interp_filter[0] =
- (id == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0];
- interp_filter[1] =
- (id == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1];
-#endif // CONFIG_DUAL_FILTER
-
// Get the prediction block from the 'other' reference frame.
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ MV *const the_other_mv = (has_second_ref(mbmi) || id)
+ ? &frame_mv[refs[!id]].as_mv
+ : &frame_comp_mv[refs[0]].as_mv;
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
av1_highbd_build_inter_predictor(
ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
- &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ the_other_mv,
+#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
+ &frame_mv[refs[!id]].as_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ &sf, pw, ph, 0, mbmi->interp_filter,
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
&warp_types, p_col, p_row,
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -5347,7 +6314,12 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
#endif // CONFIG_HIGHBITDEPTH
av1_build_inter_predictor(
ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
- &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ the_other_mv,
+#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
+ &frame_mv[refs[!id]].as_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ &sf, pw, ph, &conv_params, mbmi->interp_filter,
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
&warp_types, p_col, p_row, plane, !id,
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -5360,13 +6332,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
if (id) xd->plane[plane].pre[0] = ref_yv12[id];
av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
- // Use the mv result from the single mode as mv predictor.
- *best_mv = frame_mv[refs[id]].as_mv;
+// Use the mv result from the single mode as mv predictor.
+// Use the mv result from the single mode as mv predictor.
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi) && id)
+ *best_mv = frame_comp_mv[refs[0]].as_mv;
+ else
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ *best_mv = frame_mv[refs[id]].as_mv;
best_mv->col >>= 3;
best_mv->row >>= 3;
- av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
+ else
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
// Small-range full-pixel motion search.
bestsme =
@@ -5392,60 +6375,33 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- if (cpi->sf.use_upsampled_references) {
- // Use up-sampled reference frames.
- struct buf_2d backup_pred = pd->pre[0];
- const YV12_BUFFER_CONFIG *upsampled_ref =
- get_upsampled_ref(cpi, refs[id]);
-
- // Set pred for Y plane
- setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
- upsampled_ref->y_crop_width,
- upsampled_ref->y_crop_height, upsampled_ref->y_stride,
- (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
- pd->subsampling_y);
-
-// If bsize < BLOCK_8X8, adjust pred pointer for this block
-#if !CONFIG_CB4X4
- if (bsize < BLOCK_8X8)
- pd->pre[0].buf =
- &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
- pd->pre[0].stride))
- << 3];
-#endif // !CONFIG_CB4X4
-
- bestsme = cpi->find_fractional_mv_step(
- x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize], 0,
- cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred,
-#if CONFIG_EXT_INTER
- mask, mask_stride, id,
-#endif
- pw, ph, 1);
-
- // Restore the reference frames.
- pd->pre[0] = backup_pred;
- } else {
- (void)block;
- bestsme = cpi->find_fractional_mv_step(
- x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize], 0,
- cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred,
+ bestsme = cpi->find_fractional_mv_step(
+ x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize], 0,
+ cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
+ &dis, &sse, second_pred,
#if CONFIG_EXT_INTER
- mask, mask_stride, id,
+ mask, mask_stride, id,
#endif
- pw, ph, 0);
- }
+ pw, ph, cpi->sf.use_upsampled_references);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
if (id) xd->plane[plane].pre[0] = ref_yv12[0];
if (bestsme < last_besterr[id]) {
- frame_mv[refs[id]].as_mv = *best_mv;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // NOTE: For single ref comp mode, frame_mv stores the first mv and
+ // frame_comp_mv stores the second mv.
+ if (!has_second_ref(mbmi) && id)
+ frame_comp_mv[refs[0]].as_mv = *best_mv;
+ else
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ frame_mv[refs[id]].as_mv = *best_mv;
last_besterr[id] = bestsme;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) last_besterr[!id] = last_besterr[id];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
} else {
break;
}
@@ -5453,40 +6409,92 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
*rate_mv = 0;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#else
for (ref = 0; ref < 2; ++ref) {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
if (scaled_ref_frame[ref]) {
// Restore the prediction frame pointers to their unscaled versions.
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[ref] = backup_yv12[ref][i];
}
- av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
+
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
+ else
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
+
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) {
+ // NOTE: For single ref comp mode, i.e. !has_second_ref(mbmi) is true, the
+ // first mv is stored in frame_mv[] and the second mv is stored in
+ // frame_comp_mv[].
+ if (compound_ref0_mode(mbmi->mode) == NEWMV) // SR_NEW_NEWMV
+ *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ assert(compound_ref1_mode(mbmi->mode) == NEWMV);
+ *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ } else {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
- if (bsize >= BLOCK_8X8)
+ if (bsize >= BLOCK_8X8)
#endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
- *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
- &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
- else
- *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
- &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
+ else
+ *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
#endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
}
+
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) {
+ if (scaled_ref_frame[0]) {
+ // Restore the prediction frame pointers to their unscaled versions.
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = backup_yv12[1][i];
+ }
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
}
-static void estimate_ref_frame_costs(const AV1_COMMON *cm,
- const MACROBLOCKD *xd, int segment_id,
- unsigned int *ref_costs_single,
- unsigned int *ref_costs_comp,
- aom_prob *comp_mode_p) {
+static void estimate_ref_frame_costs(
+ const AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
+ unsigned int *ref_costs_single,
+#if CONFIG_EXT_COMP_REFS
+ unsigned int (*ref_costs_comp)[TOTAL_REFS_PER_FRAME],
+#else
+ unsigned int *ref_costs_comp,
+#endif // CONFIG_EXT_COMP_REFS
+ aom_prob *comp_mode_p) {
int seg_ref_active =
segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
if (seg_ref_active) {
memset(ref_costs_single, 0,
TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
+#if CONFIG_EXT_COMP_REFS
+ int ref_frame;
+ for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
+ memset(ref_costs_comp[ref_frame], 0,
+ TOTAL_REFS_PER_FRAME * sizeof((*ref_costs_comp)[0]));
+#else
memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
+#endif // CONFIG_EXT_COMP_REFS
+
*comp_mode_p = 128;
} else {
aom_prob intra_inter_p = av1_get_intra_inter_prob(cm, xd);
@@ -5541,7 +6549,7 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm,
ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
-#else
+#else // !CONFIG_EXT_REFS
ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1);
ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
@@ -5570,6 +6578,63 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm,
unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
+#if CONFIG_EXT_COMP_REFS
+ aom_prob comp_ref_type_p = av1_get_comp_reference_type_prob(cm, xd);
+ unsigned int ref_bicomp_costs[TOTAL_REFS_PER_FRAME] = { 0 };
+
+ ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
+ ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
+#if USE_UNI_COMP_REFS
+ base_cost + av1_cost_bit(comp_ref_type_p, 1);
+#else
+ base_cost;
+#endif // USE_UNI_COMP_REFS
+ ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF_FRAME] = 0;
+
+ ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
+ ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
+ ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
+ ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
+
+ ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
+ ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
+
+ ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
+ ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
+
+ ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
+ ref_bicomp_costs[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
+
+ int ref0;
+ for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
+ ref_costs_comp[ref0][BWDREF_FRAME] =
+ ref_bicomp_costs[ref0] + ref_bicomp_costs[BWDREF_FRAME];
+ ref_costs_comp[ref0][ALTREF_FRAME] =
+ ref_bicomp_costs[ref0] + ref_bicomp_costs[ALTREF_FRAME];
+ }
+
+ aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd);
+ aom_prob uni_comp_ref_p1 = av1_get_pred_prob_uni_comp_ref_p1(cm, xd);
+ aom_prob uni_comp_ref_p2 = av1_get_pred_prob_uni_comp_ref_p2(cm, xd);
+
+ ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
+ base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+ av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 0);
+ ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
+ base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+ av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
+ av1_cost_bit(uni_comp_ref_p2, 0);
+ ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
+ base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+ av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
+ av1_cost_bit(uni_comp_ref_p2, 1);
+
+ ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
+ base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+ av1_cost_bit(uni_comp_ref_p, 1);
+
+#else // !CONFIG_EXT_COMP_REFS
+
ref_costs_comp[LAST_FRAME] =
#if CONFIG_EXT_REFS
ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] =
@@ -5596,11 +6661,23 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm,
// more bit.
ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
-#else
+#else // !CONFIG_EXT_REFS
ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_COMP_REFS
} else {
+#if CONFIG_EXT_COMP_REFS
+ int ref0;
+ for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
+ ref_costs_comp[ref0][BWDREF_FRAME] = 512;
+ ref_costs_comp[ref0][ALTREF_FRAME] = 512;
+ }
+ ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
+ ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
+ ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
+ ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
+#else // !CONFIG_EXT_COMP_REFS
ref_costs_comp[LAST_FRAME] = 512;
#if CONFIG_EXT_REFS
ref_costs_comp[LAST2_FRAME] = 512;
@@ -5609,6 +6686,7 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm,
ref_costs_comp[ALTREF_FRAME] = 512;
#endif // CONFIG_EXT_REFS
ref_costs_comp[GOLDEN_FRAME] = 512;
+#endif // CONFIG_EXT_COMP_REFS
}
}
}
@@ -5693,8 +6771,13 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
int sadpb = x->sadperbit16;
MV mvp_full;
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ int ref =
+ has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0];
+#else // !CONFIG_COMPOUND_SINGLEREF
int ref = mbmi->ref_frame[ref_idx];
-#else
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
int ref = mbmi->ref_frame[0];
int ref_idx = 0;
#endif // CONFIG_EXT_INTER
@@ -5802,7 +6885,7 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
&(x->best_mv.as_mv), 0);
break;
- default: assert("Invalid motion mode!\n");
+ default: assert(0 && "Invalid motion mode!\n");
}
#endif // CONFIG_MOTION_VAR
@@ -5820,17 +6903,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x->second_best_mv.as_int != x->best_mv.as_int;
const int pw = block_size_wide[bsize];
const int ph = block_size_high[bsize];
- // Use up-sampled reference frames.
- struct macroblockd_plane *const pd = &xd->plane[0];
- struct buf_2d backup_pred = pd->pre[ref_idx];
- const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
-
- // Set pred for Y plane
- setup_pred_plane(
- &pd->pre[ref_idx], bsize, upsampled_ref->y_buffer,
- upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
- upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
- pd->subsampling_x, pd->subsampling_y);
best_mv_var = cpi->find_fractional_mv_step(
x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
@@ -5873,9 +6945,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x->best_mv.as_mv = best_mv;
}
}
-
- // Restore the reference frames.
- pd->pre[ref_idx] = backup_pred;
} else {
cpi->find_fractional_mv_step(
x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
@@ -5891,13 +6960,12 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
break;
case OBMC_CAUSAL:
av1_find_best_obmc_sub_pixel_tree_up(
- cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
- cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
- cpi->sf.use_upsampled_references);
+ x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis,
+ &x->pred_sse[ref], 0, cpi->sf.use_upsampled_references);
break;
- default: assert("Invalid motion mode!\n");
+ default: assert(0 && "Invalid motion mode!\n");
}
#endif // CONFIG_MOTION_VAR
}
@@ -5936,15 +7004,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
const int ph = block_size_high[bsize];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_COMPOUND_SINGLEREF
+ const int other_ref =
+ has_second_ref(mbmi) ? mbmi->ref_frame[!ref_idx] : mbmi->ref_frame[0];
+#else // !CONFIG_COMPOUND_SINGLEREF
const int other_ref = mbmi->ref_frame[!ref_idx];
-#if CONFIG_DUAL_FILTER
- InterpFilter interp_filter[2] = {
- (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0],
- (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1]
- };
-#else
- const InterpFilter interp_filter = mbmi->interp_filter;
-#endif // CONFIG_DUAL_FILTER
+#endif // CONFIG_COMPOUND_SINGLEREF
struct scale_factors sf;
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -5961,8 +7026,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
(void)block;
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- // This function should only ever be called for compound modes
+// This function should only ever be called for compound modes
+#if CONFIG_COMPOUND_SINGLEREF
+ assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode));
+#else // !CONFIG_COMPOUND_SINGLEREF
assert(has_second_ref(mbmi));
+#endif // CONFIG_COMPOUND_SINGLEREF
struct buf_2d backup_yv12[MAX_MB_PLANE];
const YV12_BUFFER_CONFIG *const scaled_ref_frame =
@@ -5991,7 +7060,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
struct buf_2d ref_yv12;
const int plane = 0;
- ConvolveParams conv_params = get_conv_params(0, plane);
+ ConvolveParams conv_params = get_conv_params(!ref_idx, 0, plane);
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
WarpTypesAllowed warp_types;
#if CONFIG_GLOBAL_MOTION
@@ -6010,7 +7079,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
av1_highbd_build_inter_predictor(
ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
- 0, interp_filter,
+ 0, mbmi->interp_filter,
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
&warp_types, p_col, p_row,
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -6019,7 +7088,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
#endif // CONFIG_HIGHBITDEPTH
av1_build_inter_predictor(
ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
- &conv_params, interp_filter,
+ &conv_params, mbmi->interp_filter,
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
&warp_types, p_col, p_row, plane, !ref_idx,
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -6038,15 +7107,22 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
// Search for the best mv for one component of a compound,
// given that the other component is fixed.
-static void compound_single_motion_search(
- const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv,
- int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int *rate_mv, const int block, int ref_idx) {
+static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *this_mv,
+ int mi_row, int mi_col,
+ const uint8_t *second_pred,
+ const uint8_t *mask, int mask_stride,
+ int *rate_mv, int ref_idx) {
const int pw = block_size_wide[bsize];
const int ph = block_size_high[bsize];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_COMPOUND_SINGLEREF
+ const int ref =
+ has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0];
+#else
const int ref = mbmi->ref_frame[ref_idx];
+#endif // CONFIG_COMPOUND_SINGLEREF
int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -6054,9 +7130,16 @@ static void compound_single_motion_search(
const YV12_BUFFER_CONFIG *const scaled_ref_frame =
av1_get_scaled_ref_frame(cpi, ref);
- // Check that this is either an interinter or an interintra block
+// Check that this is either an interinter or an interintra block
+#if CONFIG_COMPOUND_SINGLEREF
assert(has_second_ref(mbmi) ||
+ // or a single ref comp pred mode
+ is_inter_singleref_comp_mode(mbmi->mode) ||
(ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
+#else
+ assert(has_second_ref(mbmi) ||
+ (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
+#endif // CONFIG_COMPOUND_SINGLEREF
if (scaled_ref_frame) {
int i;
@@ -6091,7 +7174,12 @@ static void compound_single_motion_search(
best_mv->col >>= 3;
best_mv->row >>= 3;
- av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx);
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
// Small-range full-pixel motion search.
bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
@@ -6112,43 +7200,11 @@ static void compound_single_motion_search(
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- if (cpi->sf.use_upsampled_references) {
- // Use up-sampled reference frames.
- struct buf_2d backup_pred = pd->pre[0];
- const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
-
- // Set pred for Y plane
- setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
- upsampled_ref->y_crop_width,
- upsampled_ref->y_crop_height, upsampled_ref->y_stride,
- (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
- pd->subsampling_y);
-
-// If bsize < BLOCK_8X8, adjust pred pointer for this block
-#if !CONFIG_CB4X4
- if (bsize < BLOCK_8X8)
- pd->pre[0].buf =
- &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
- pd->pre[0].stride))
- << 3];
-#endif // !CONFIG_CB4X4
-
- bestsme = cpi->find_fractional_mv_step(
- x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
- x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
- mask_stride, ref_idx, pw, ph, 1);
-
- // Restore the reference frames.
- pd->pre[0] = backup_pred;
- } else {
- (void)block;
- bestsme = cpi->find_fractional_mv_step(
- x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
- x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
- mask_stride, ref_idx, pw, ph, 0);
- }
+ bestsme = cpi->find_fractional_mv_step(
+ x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
+ x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
+ ref_idx, pw, ph, cpi->sf.use_upsampled_references);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
@@ -6165,7 +7221,12 @@ static void compound_single_motion_search(
xd->plane[i].pre[ref_idx] = backup_yv12[i];
}
- av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx);
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
*rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
x->mvcost, MV_COST_WEIGHT);
}
@@ -6174,13 +7235,23 @@ static void compound_single_motion_search(
// where the second prediction is also an inter mode.
static void compound_single_motion_search_interinter(
const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ int_mv *frame_comp_mv,
+#endif // CONFIG_COMPOUND_SINGLEREF
int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
const int block, int ref_idx) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- // This function should only ever be called for compound modes
+// This function should only ever be called for compound modes
+#if CONFIG_COMPOUND_SINGLEREF
+ int is_singleref_comp_mode =
+ !has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode);
+ assert(has_second_ref(mbmi) || is_singleref_comp_mode);
+ if (is_singleref_comp_mode && ref_idx) assert(frame_comp_mv);
+#else // !CONFIG_COMPOUND_SINGLEREF
assert(has_second_ref(mbmi));
+#endif // CONFIG_COMPOUND_SINGLEREF
// Prediction buffer from second frame.
#if CONFIG_HIGHBITDEPTH
@@ -6194,14 +7265,26 @@ static void compound_single_motion_search_interinter(
DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
#endif // CONFIG_HIGHBITDEPTH
+#if CONFIG_COMPOUND_SINGLEREF
+ MV *this_mv = has_second_ref(mbmi)
+ ? &frame_mv[mbmi->ref_frame[ref_idx]].as_mv
+ : (ref_idx ? &frame_comp_mv[mbmi->ref_frame[0]].as_mv
+ : &frame_mv[mbmi->ref_frame[0]].as_mv);
+ const MV *other_mv =
+ has_second_ref(mbmi)
+ ? &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv
+ : (ref_idx ? &frame_mv[mbmi->ref_frame[0]].as_mv
+ : &frame_comp_mv[mbmi->ref_frame[0]].as_mv);
+#else // !CONFIG_COMPOUND_SINGLEREF
MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
+#endif // CONFIG_COMPOUND_SINGLEREF
build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
ref_idx, second_pred);
compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
- second_pred, mask, mask_stride, rate_mv, block,
+ second_pred, mask, mask_stride, rate_mv,
ref_idx);
}
@@ -6220,21 +7303,40 @@ static void do_masked_motion_search_indexed(
mask = av1_get_compound_type_mask(comp_data, sb_type);
int_mv frame_mv[TOTAL_REFS_PER_FRAME];
+#if CONFIG_COMPOUND_SINGLEREF
+ int_mv frame_comp_mv[TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_COMPOUND_SINGLEREF
MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4);
frame_mv[rf[0]].as_int = cur_mv[0].as_int;
- frame_mv[rf[1]].as_int = cur_mv[1].as_int;
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ frame_comp_mv[rf[0]].as_int = cur_mv[1].as_int;
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ frame_mv[rf[1]].as_int = cur_mv[1].as_int;
if (which == 0 || which == 1) {
- compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
- mi_col, mask, mask_stride, rate_mv,
- 0, which);
+ compound_single_motion_search_interinter(
+ cpi, x, bsize, frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ has_second_ref(mbmi) ? NULL : frame_comp_mv,
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, mask, mask_stride, rate_mv, 0, which);
} else if (which == 2) {
- joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
- mask_stride, rate_mv, 0);
+ joint_motion_search(cpi, x, bsize, frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ has_second_ref(mbmi) ? NULL : frame_comp_mv,
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, NULL, mask, mask_stride, rate_mv, 0);
}
tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
- tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ tmp_mv[1].as_int = frame_comp_mv[rf[0]].as_int;
+ else // comp ref
+#endif // CONFIG_COMPOUND_SINGLEREF
+ tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
}
#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
#endif // CONFIG_EXT_INTER
@@ -6483,7 +7585,7 @@ static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
- rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ rd = RDCOST(x->rdmult, rate, dist);
if (rd < best_rd) {
*best_wedge_index = wedge_index;
@@ -6544,7 +7646,7 @@ static int64_t pick_wedge_fixed_sign(
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
- rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ rd = RDCOST(x->rdmult, rate, dist);
if (rd < best_rd) {
*best_wedge_index = wedge_index;
@@ -6646,7 +7748,7 @@ static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
- rd0 = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ rd0 = RDCOST(x->rdmult, rate, dist);
if (rd0 < best_rd) {
best_mask_type = cur_mask_type;
@@ -6729,7 +7831,17 @@ static int interinter_compound_motion_search(
#endif // CONFIG_COMPOUND_SEGMENT
mbmi->interinter_compound_type
};
- if (this_mode == NEW_NEWMV) {
+#if CONFIG_COMPOUND_SINGLEREF
+ // NOTE: Mode is needed to identify the compound mode prediction, regardless
+ // of comp refs or single ref.
+ mbmi->mode = this_mode;
+#endif // CONFIG_COMPOUND_SINGLEREF
+
+ if (this_mode == NEW_NEWMV
+#if CONFIG_COMPOUND_SINGLEREF
+ || this_mode == SR_NEW_NEWMV
+#endif // CONFIG_COMPOUND_SINGLEREF
+ ) {
do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
mbmi->mv[0].as_int = tmp_mv[0].as_int;
@@ -6738,7 +7850,12 @@ static int interinter_compound_motion_search(
do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
mbmi->mv[0].as_int = tmp_mv[0].as_int;
- } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
+ } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV
+#if CONFIG_COMPOUND_SINGLEREF
+ // || this_mode == SR_NEAREST_NEWMV
+ || this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV
+#endif // CONFIG_COMPOUND_SINGLEREF
+ ) {
do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
mbmi->mv[1].as_int = tmp_mv[1].as_int;
@@ -6763,7 +7880,7 @@ static int64_t build_and_cost_compound_type(
const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
- best_rd_cur += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0);
+ best_rd_cur += RDCOST(x->rdmult, rs2 + rate_mv, 0);
if (have_newmv_in_inter_mode(this_mode) &&
use_masked_motion_search(compound_type)) {
@@ -6772,7 +7889,7 @@ static int64_t build_and_cost_compound_type(
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
if (rd >= best_rd_cur) {
mbmi->mv[0].as_int = cur_mv[0].as_int;
mbmi->mv[1].as_int = cur_mv[1].as_int;
@@ -6788,7 +7905,7 @@ static int64_t build_and_cost_compound_type(
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
best_rd_cur = rd;
} else {
@@ -6801,7 +7918,7 @@ static int64_t build_and_cost_compound_type(
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
best_rd_cur = rd;
}
return best_rd_cur;
@@ -6832,6 +7949,9 @@ typedef struct {
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
const BLOCK_SIZE bsize,
int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv (*const mode_comp_mv)[TOTAL_REFS_PER_FRAME],
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
const int mi_row, const int mi_col,
int *const rate_mv, int_mv *const single_newmv,
HandleInterModeArgs *const args) {
@@ -6844,6 +7964,9 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
#endif // CONFIG_EXT_INTER
int_mv *const frame_mv = mode_mv[this_mode];
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv *const frame_comp_mv = mode_comp_mv[this_mode];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
const int refs[2] = { mbmi->ref_frame[0],
mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
int i;
@@ -6861,8 +7984,11 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
- joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
- 0, rate_mv, 0);
+ joint_motion_search(cpi, x, bsize, frame_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ NULL, // int_mv *frame_comp_mv
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, NULL, NULL, 0, rate_mv, 0);
} else {
*rate_mv = 0;
for (i = 0; i < 2; ++i) {
@@ -6877,8 +8003,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
frame_mv[refs[0]].as_int =
mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
- compound_single_motion_search_interinter(
- cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
+ compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ NULL,
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, NULL, 0,
+ rate_mv, 0, 1);
} else {
av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
*rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
@@ -6891,8 +8021,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
frame_mv[refs[1]].as_int =
mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
- compound_single_motion_search_interinter(
- cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
+ compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ NULL,
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, NULL, 0,
+ rate_mv, 0, 0);
} else {
av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
*rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
@@ -6900,7 +8034,7 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
}
}
-#else
+#else // !CONFIG_EXT_INTER
// Initialize mv using single prediction mode result.
frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
@@ -6917,6 +8051,41 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
}
}
#endif // CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(this_mode)) {
+ // Single ref comp mode
+ const int mode0 = compound_ref0_mode(this_mode);
+
+ single_newmv[refs[0]].as_int = args->single_newmv[refs[0]].as_int;
+ frame_mv[refs[0]].as_int = (mode0 == NEWMV)
+ ? single_newmv[refs[0]].as_int
+ : mode_mv[mode0][refs[0]].as_int;
+ assert(compound_ref1_mode(this_mode) == NEWMV);
+ frame_comp_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ if (this_mode == SR_NEW_NEWMV) {
+ joint_motion_search(cpi, x, bsize, frame_mv, frame_comp_mv, mi_row,
+ mi_col, NULL, NULL, 0, rate_mv, 0);
+ } else {
+ assert( // this_mode == SR_NEAREST_NEWMV ||
+ this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV);
+ compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
+ frame_comp_mv, mi_row, mi_col,
+ NULL, 0, rate_mv, 0, 1);
+ }
+ } else {
+ *rate_mv = 0;
+ av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
+ if (mode0 == NEWMV)
+ *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv,
+ &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
} else {
#if CONFIG_EXT_INTER
if (is_comp_interintra_pred) {
@@ -6984,7 +8153,7 @@ int64_t interpolation_filter_search(
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
skip_txfm_sb, skip_sse_sb);
- *rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist);
+ *rd = RDCOST(x->rdmult, *switchable_rate + tmp_rate, tmp_dist);
if (assign_filter == SWITCHABLE) {
// do interp_filter search
@@ -7020,7 +8189,7 @@ int64_t interpolation_filter_search(
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
&tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
- tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist);
+ tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
if (tmp_rd < *rd) {
*rd = tmp_rd;
@@ -7072,12 +8241,10 @@ static int64_t motion_mode_rd(
int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
const int *refs, int rate_mv,
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+ // only used when WARPED_MOTION is on?
int_mv *const single_newmv,
#if CONFIG_EXT_INTER
- int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi,
-#if CONFIG_MOTION_VAR
- int rate_mv_bmc,
-#endif // CONFIG_MOTION_VAR
+ int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc,
#endif // CONFIG_EXT_INTER
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
@@ -7108,7 +8275,13 @@ static int64_t motion_mode_rd(
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_WARPED_MOTION
+#if WARPED_MOTION_SORT_SAMPLES
+ int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
+ int pts_mv0[SAMPLES_ARRAY_SIZE];
+ int total_samples;
+#else
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#endif // WARPED_MOTION_SORT_SAMPLES
#endif // CONFIG_WARPED_MOTION
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -7118,18 +8291,39 @@ static int64_t motion_mode_rd(
if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
#if CONFIG_WARPED_MOTION
aom_clear_system_state();
+#if WARPED_MOTION_SORT_SAMPLES
+ mbmi->num_proj_ref[0] =
+ findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0);
+ total_samples = mbmi->num_proj_ref[0];
+#else
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
+#endif // WARPED_MOTION_SORT_SAMPLES
#if CONFIG_EXT_INTER
best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
#endif // CONFIG_EXT_INTER
#endif // CONFIG_WARPED_MOTION
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
rate2_nocoeff = rd_stats->rate;
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+ // We cannot estimate the rd cost for the motion mode NCOBMC_ADAPT_WEIGHT
+ // right now since it requires mvs from all neighboring blocks. We will
+ // check if this mode is beneficial after all the mv's in the current
+ // superblock are selected.
+ last_motion_mode_allowed = motion_mode_allowed_wrapper(1,
+#if CONFIG_GLOBAL_MOTION
+ 0, xd->global_motion,
+#endif // CONFIG_GLOBAL_MOTION
+ mi);
+#else
last_motion_mode_allowed = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#if CONFIG_GLOBAL_MOTION
0, xd->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
mi);
+#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
base_mbmi = *mbmi;
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -7155,7 +8349,11 @@ static int64_t motion_mode_rd(
*mbmi = *best_bmc_mbmi;
mbmi->motion_mode = OBMC_CAUSAL;
#endif // CONFIG_EXT_INTER
- if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
+ if (!is_comp_pred &&
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ !is_inter_singleref_comp_mode(this_mode) &&
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ have_newmv_in_inter_mode(this_mode)) {
int tmp_rate_mv = 0;
single_motion_search(cpi, x, bsize, mi_row, mi_col,
@@ -7195,6 +8393,9 @@ static int64_t motion_mode_rd(
#if CONFIG_WARPED_MOTION
if (mbmi->motion_mode == WARPED_CAUSAL) {
+#if WARPED_MOTION_SORT_SAMPLES
+ int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#endif // WARPED_MOTION_SORT_SAMPLES
#if CONFIG_EXT_INTER
*mbmi = *best_bmc_mbmi;
mbmi->motion_mode = WARPED_CAUSAL;
@@ -7210,6 +8411,19 @@ static int64_t motion_mode_rd(
: cm->interp_filter;
#endif // CONFIG_DUAL_FILTER
+#if WARPED_MOTION_SORT_SAMPLES
+ memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
+ memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
+ // Rank the samples by motion vector difference
+ if (mbmi->num_proj_ref[0] > 1) {
+ mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
+ pts_inref, mbmi->num_proj_ref[0]);
+#if CONFIG_EXT_INTER
+ best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
+#endif // CONFIG_EXT_INTER
+ }
+#endif // WARPED_MOTION_SORT_SAMPLES
+
if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
&mbmi->wm_params[0], mi_row, mi_col)) {
@@ -7218,9 +8432,16 @@ static int64_t motion_mode_rd(
int tmp_rate_mv = 0;
const int_mv mv0 = mbmi->mv[0];
WarpedMotionParams wm_params0 = mbmi->wm_params[0];
+#if WARPED_MOTION_SORT_SAMPLES
+ int num_proj_ref0 = mbmi->num_proj_ref[0];
// Refine MV in a small range.
+ av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
+ pts_mv0, total_samples);
+#else
+ // Refine MV in a small range.
av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
+#endif // WARPED_MOTION_SORT_SAMPLES
// Keep the refined MV and WM parameters.
if (mv0.as_int != mbmi->mv[0].as_int) {
@@ -7241,6 +8462,9 @@ static int64_t motion_mode_rd(
tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
}
#if CONFIG_EXT_INTER
+#if WARPED_MOTION_SORT_SAMPLES
+ best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
+#endif // WARPED_MOTION_SORT_SAMPLES
tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
#else
tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
@@ -7255,6 +8479,9 @@ static int64_t motion_mode_rd(
// Restore the old MV and WM parameters.
mbmi->mv[0] = mv0;
mbmi->wm_params[0] = wm_params0;
+#if WARPED_MOTION_SORT_SAMPLES
+ mbmi->num_proj_ref[0] = num_proj_ref0;
+#endif // WARPED_MOTION_SORT_SAMPLES
}
}
@@ -7328,8 +8555,8 @@ static int64_t motion_mode_rd(
av1_merge_rd_stats(rd_stats, rd_stats_y);
- rdcosty = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
- rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
+ rdcosty = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
+ rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, 0, rd_stats->sse));
/* clang-format off */
#if CONFIG_VAR_TX
is_cost_valid_uv =
@@ -7365,12 +8592,11 @@ static int64_t motion_mode_rd(
mbmi->skip = 0;
// here mbmi->skip temporarily plays a role as what this_skip2 does
} else if (!xd->lossless[mbmi->segment_id] &&
- (RDCOST(x->rdmult, x->rddiv,
+ (RDCOST(x->rdmult,
rd_stats_y->rate + rd_stats_uv->rate +
av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
rd_stats->dist) >=
- RDCOST(x->rdmult, x->rddiv,
- av1_cost_bit(av1_get_skip_prob(cm, xd), 1),
+ RDCOST(x->rdmult, av1_cost_bit(av1_get_skip_prob(cm, xd), 1),
rd_stats->sse))) {
rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
@@ -7427,7 +8653,7 @@ static int64_t motion_mode_rd(
#endif // CONFIG_GLOBAL_MOTION
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- tmp_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
+ tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
best_mbmi = *mbmi;
best_rd = tmp_rd;
@@ -7466,11 +8692,17 @@ static int64_t motion_mode_rd(
return 0;
}
-static int64_t handle_inter_mode(
- const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
- int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
- int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) {
+static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, RD_STATS *rd_stats,
+ RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
+ int *disable_skip,
+ int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv (*mode_comp_mv)[TOTAL_REFS_PER_FRAME],
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int mi_row, int mi_col,
+ HandleInterModeArgs *args,
+ const int64_t ref_best_rd) {
const AV1_COMMON *cm = &cpi->common;
(void)cm;
MACROBLOCKD *xd = &x->e_mbd;
@@ -7479,7 +8711,14 @@ static int64_t handle_inter_mode(
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const int is_comp_pred = has_second_ref(mbmi);
const int this_mode = mbmi->mode;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ const int is_singleref_comp_mode = is_inter_singleref_comp_mode(this_mode);
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
int_mv *frame_mv = mode_mv[this_mode];
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // The comp mv for the compound mode in single ref
+ int_mv *frame_comp_mv = mode_comp_mv[this_mode];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
int i;
int refs[2] = { mbmi->ref_frame[0],
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
@@ -7487,7 +8726,7 @@ static int64_t handle_inter_mode(
int rate_mv = 0;
#if CONFIG_EXT_INTER
int pred_exists = 1;
-#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA
const int bw = block_size_wide[bsize];
#endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
int_mv single_newmv[TOTAL_REFS_PER_FRAME];
@@ -7511,9 +8750,7 @@ static int64_t handle_inter_mode(
#if CONFIG_EXT_INTER
int rate2_bmc_nocoeff;
MB_MODE_INFO best_bmc_mbmi;
-#if CONFIG_MOTION_VAR
int rate_mv_bmc;
-#endif // CONFIG_MOTION_VAR
#endif // CONFIG_EXT_INTER
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
int64_t rd = INT64_MAX;
@@ -7523,6 +8760,11 @@ static int64_t handle_inter_mode(
int skip_txfm_sb = 0;
int64_t skip_sse_sb = INT64_MAX;
int16_t mode_ctx;
+#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
+ // dummy fillers
+ mbmi->ncobmc_mode[0] = NO_OVERLAP;
+ mbmi->ncobmc_mode[1] = NO_OVERLAP;
+#endif
#if CONFIG_EXT_INTER
#if CONFIG_INTERINTRA
@@ -7546,7 +8788,11 @@ static int64_t handle_inter_mode(
#endif // CONFIG_EXT_INTER
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ if (is_comp_pred || is_singleref_comp_mode)
+#else // !CONFIG_COMPOUND_SINGLEREF
if (is_comp_pred)
+#endif // CONFIG_COMPOUND_SINGLEREF
mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
else
#endif // CONFIG_EXT_INTER
@@ -7572,12 +8818,22 @@ static int64_t handle_inter_mode(
if (frame_mv[refs[0]].as_int == INVALID_MV ||
frame_mv[refs[1]].as_int == INVALID_MV)
return INT64_MAX;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ } else if (is_singleref_comp_mode) {
+ if (frame_mv[refs[0]].as_int == INVALID_MV ||
+ frame_comp_mv[refs[0]].as_int == INVALID_MV)
+ return INT64_MAX;
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
}
mbmi->motion_mode = SIMPLE_TRANSLATION;
if (have_newmv_in_inter_mode(this_mode)) {
- const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col,
- &rate_mv, single_newmv, args);
+ const int64_t ret_val =
+ handle_newmv(cpi, x, bsize, mode_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ mode_comp_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, &rate_mv, single_newmv, args);
if (ret_val != 0)
return ret_val;
else
@@ -7591,6 +8847,16 @@ static int64_t handle_inter_mode(
mbmi->mv[i].as_int = cur_mv[i].as_int;
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!is_comp_pred && is_singleref_comp_mode) {
+ cur_mv[1] = frame_comp_mv[refs[0]];
+ // Clip "next_nearest" so that it does not extend to far out of image
+ if (this_mode != NEWMV) clamp_mv2(&cur_mv[1].as_mv, xd);
+ if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_EXT_INTER
if (this_mode == NEAREST_NEARESTMV)
#else
@@ -7614,7 +8880,13 @@ static int64_t handle_inter_mode(
#if CONFIG_EXT_INTER
if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
- if (this_mode == NEAREST_NEWMV) {
+#if CONFIG_COMPOUND_SINGLEREF
+ if (this_mode == NEAREST_NEWMV || // this_mode == SR_NEAREST_NEWMV ||
+ this_mode == SR_NEAREST_NEARMV)
+#else // !CONFIG_COMPOUND_SINGLEREF
+ if (this_mode == NEAREST_NEWMV)
+#endif // CONFIG_COMPOUND_SINGLEREF
+ {
cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
@@ -7635,7 +8907,11 @@ static int64_t handle_inter_mode(
if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
int ref_mv_idx = mbmi->ref_mv_idx + 1;
- if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
+ if (this_mode == NEAR_NEWMV ||
+#if CONFIG_COMPOUND_SINGLEREF
+ this_mode == SR_NEAR_NEWMV ||
+#endif // CONFIG_COMPOUND_SINGLEREF
+ this_mode == NEAR_NEARMV) {
cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
@@ -7644,8 +8920,17 @@ static int64_t handle_inter_mode(
mbmi->mv[0].as_int = cur_mv[0].as_int;
}
- if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
- cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+ if (this_mode == NEW_NEARMV ||
+#if CONFIG_COMPOUND_SINGLEREF
+ this_mode == SR_NEAREST_NEARMV ||
+#endif // CONFIG_COMPOUND_SINGLEREF
+ this_mode == NEAR_NEARMV) {
+#if CONFIG_COMPOUND_SINGLEREF
+ if (this_mode == SR_NEAREST_NEARMV)
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
clamp_mv2(&cur_mv[1].as_mv, xd);
@@ -7653,7 +8938,7 @@ static int64_t handle_inter_mode(
mbmi->mv[1].as_int = cur_mv[1].as_int;
}
}
-#else
+#else // !CONFIG_EXT_INTER
if (this_mode == NEARMV && is_comp_pred) {
uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
@@ -7706,7 +8991,7 @@ static int64_t handle_inter_mode(
rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx);
}
- if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, 0) > ref_best_rd &&
+ if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
#if CONFIG_EXT_INTER
mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV
#else
@@ -7725,13 +9010,16 @@ static int64_t handle_inter_mode(
best_bmc_mbmi = *mbmi;
rate2_bmc_nocoeff = rd_stats->rate;
if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
-#if CONFIG_MOTION_VAR
rate_mv_bmc = rate_mv;
-#endif // CONFIG_MOTION_VAR
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
- if (is_comp_pred) {
+#if CONFIG_COMPOUND_SINGLEREF
+ if (is_comp_pred || is_singleref_comp_mode)
+#else
+ if (is_comp_pred)
+#endif // CONFIG_COMPOUND_SINGLEREF
+ {
int rate_sum, rs2;
int64_t dist_sum;
int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
@@ -7741,8 +9029,8 @@ static int64_t handle_inter_mode(
int tmp_skip_txfm_sb;
int64_t tmp_skip_sse_sb;
int compound_type_cost[COMPOUND_TYPES];
- uint8_t pred0[2 * MAX_SB_SQUARE];
- uint8_t pred1[2 * MAX_SB_SQUARE];
+ DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
uint8_t *preds0[1] = { pred0 };
uint8_t *preds1[1] = { pred1 };
int strides[1] = { bw };
@@ -7761,6 +9049,17 @@ static int64_t handle_inter_mode(
best_compound_data.seg_mask = tmp_mask_buf;
#endif // CONFIG_COMPOUND_SEGMENT
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // TODO(zoeliu): To further check whether the following setups are needed.
+ // Single ref compound mode: Prepare the 2nd ref frame predictor the same as
+ // the 1st one.
+ if (!is_comp_pred && is_singleref_comp_mode) {
+ xd->block_refs[1] = xd->block_refs[0];
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = xd->plane[i].pre[0];
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
if (masked_compound_used) {
av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
av1_compound_type_tree);
@@ -7773,7 +9072,7 @@ static int64_t handle_inter_mode(
for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
- if (!is_interinter_compound_used(cur_type, bsize)) break;
+ if (!is_interinter_compound_used(cur_type, bsize)) continue;
tmp_rate_mv = rate_mv;
best_rd_cur = INT64_MAX;
mbmi->interinter_compound_type = cur_type;
@@ -7792,8 +9091,7 @@ static int64_t handle_inter_mode(
&tmp_skip_txfm_sb, &tmp_skip_sse_sb,
INT64_MAX);
if (rd != INT64_MAX)
- best_rd_cur =
- RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
+ best_rd_cur = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
best_rd_compound = best_rd_cur;
break;
#if CONFIG_WEDGE
@@ -7923,8 +9221,7 @@ static int64_t handle_inter_mode(
av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd =
- RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum);
+ rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
if (rd < best_interintra_rd) {
best_interintra_rd = rd;
best_interintra_mode = mbmi->interintra_mode;
@@ -7939,7 +9236,7 @@ static int64_t handle_inter_mode(
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum);
best_interintra_rd = rd;
if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
@@ -7953,8 +9250,7 @@ static int64_t handle_inter_mode(
int_mv tmp_mv;
int rwedge = av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum,
- dist_sum);
+ rd = RDCOST(x->rdmult, rmode + rate_mv + rwedge + rate_sum, dist_sum);
best_interintra_rd_nowedge = best_interintra_rd;
// Disable wedge search if source variance is small
@@ -7968,7 +9264,7 @@ static int64_t handle_inter_mode(
pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
best_interintra_rd_wedge +=
- RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge, 0);
+ RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0);
// Refine motion vector.
if (have_newmv_in_inter_mode(this_mode)) {
// get negative of mask
@@ -7977,14 +9273,14 @@ static int64_t handle_inter_mode(
tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
mi_col, intrapred, mask, bw,
- &tmp_rate_mv, 0, 0);
+ &tmp_rate_mv, 0);
mbmi->mv[0].as_int = tmp_mv.as_int;
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv,
- rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
+ dist_sum);
if (rd >= best_interintra_rd_wedge) {
tmp_mv.as_int = cur_mv[0].as_int;
tmp_rate_mv = rate_mv;
@@ -8000,8 +9296,8 @@ static int64_t handle_inter_mode(
estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv,
- rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
+ dist_sum);
best_interintra_rd_wedge = rd;
if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
mbmi->use_wedge_interintra = 1;
@@ -8042,7 +9338,7 @@ static int64_t handle_inter_mode(
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
&tmp_dist, &skip_txfm_sb, &skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+ rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist);
}
#endif // CONFIG_EXT_INTER
@@ -8097,10 +9393,7 @@ static int64_t handle_inter_mode(
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
single_newmv,
#if CONFIG_EXT_INTER
- rate2_bmc_nocoeff, &best_bmc_mbmi,
-#if CONFIG_MOTION_VAR
- rate_mv_bmc,
-#endif // CONFIG_MOTION_VAR
+ rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc,
#endif // CONFIG_EXT_INTER
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
@@ -8118,11 +9411,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
const TileInfo *tile = &xd->tile;
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *const ec_ctx = cm->fc;
-#endif // CONFIG_EC_ADAPT
MODE_INFO *const mi = xd->mi[0];
const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
@@ -8222,7 +9511,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
#endif
mbmi->use_intrabc = 1;
mbmi->mode = DC_PRED;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->mv[0].as_mv = dv;
#if CONFIG_DUAL_FILTER
for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
@@ -8233,12 +9522,12 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
x->skip = 0;
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
+ assert(x->mvcost == x->mv_cost_stack[0]);
+ // TODO(aconverse@google.com): The full motion field defining discount
+ // in MV_COST_WEIGHT is too large. Explore other values.
int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
- const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0);
- const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0);
- const int rate_mode = cpi->y_mode_costs[A][L][DC_PRED] +
- av1_cost_bit(ec_ctx->intrabc_prob, 1);
+ x->mvcost, MV_COST_WEIGHT_SUB);
+ const int rate_mode = av1_cost_bit(ec_ctx->intrabc_prob, 1);
RD_STATS rd_stats, rd_stats_uv;
av1_subtract_plane(x, bsize, 0);
@@ -8267,8 +9556,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
rdc_noskip.rate =
rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0);
rdc_noskip.dist = rd_stats.dist;
- rdc_noskip.rdcost =
- RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist);
+ rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist);
if (rdc_noskip.rdcost < best_rd) {
best_rd = rdc_noskip.rdcost;
best_mbmi = *mbmi;
@@ -8282,7 +9570,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
av1_init_rd_stats(&rdc_skip);
rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1);
rdc_skip.dist = rd_stats.sse;
- rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist);
+ rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist);
if (rdc_skip.rdcost < best_rd) {
best_rd = rdc_skip.rdcost;
best_mbmi = *mbmi;
@@ -8302,6 +9590,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
struct macroblockd_plane *const pd = xd->plane;
int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
int y_skip = 0, uv_skip = 0;
@@ -8310,11 +9599,11 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
const int unify_bsize = CONFIG_CB4X4;
ctx->skip = 0;
- xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
- xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE_FRAME;
#if CONFIG_INTRABC
- xd->mi[0]->mbmi.use_intrabc = 0;
- xd->mi[0]->mbmi.mv[0].as_int = 0;
+ mbmi->use_intrabc = 0;
+ mbmi->mv[0].as_int = 0;
#endif // CONFIG_INTRABC
const int64_t intra_yrd =
@@ -8325,9 +9614,29 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
&dist_y, &y_skip, best_rd);
if (intra_yrd < best_rd) {
- max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size]
- [pd[1].subsampling_x][pd[1].subsampling_y];
- init_sbuv_mode(&xd->mi[0]->mbmi);
+#if CONFIG_CFL
+ // Perform one extra txfm_rd_in_plane() call, this time with the best value
+ // so we can store reconstructed luma values
+ RD_STATS this_rd_stats;
+
+#if CONFIG_CB4X4
+ // Don't store the luma value if no chroma is associated.
+ // Don't worry, we will store this reconstructed luma in the following
+ // encode dry-run the chroma plane will never know.
+ x->cfl_store_y = !x->skip_chroma_rd;
+#else
+ x->cfl_store_y = 1;
+#endif
+
+ txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
+ mbmi->sb_type, mbmi->tx_size,
+ cpi->sf.use_fast_coef_costing);
+
+ x->cfl_store_y = 0;
+#endif
+ max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x]
+ [pd[1].subsampling_y];
+ init_sbuv_mode(mbmi);
#if CONFIG_CB4X4
if (!x->skip_chroma_rd)
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
@@ -8346,8 +9655,8 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
rate_y + rate_uv + av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
rd_cost->dist = dist_y + dist_uv;
}
- rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_cost->dist_y = dist_y;
#endif
} else {
@@ -8360,7 +9669,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
ctx->skip = x->skip; // FIXME where is the proper place to set this?!
assert(rd_cost->rate != INT_MAX);
- rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+ rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
}
#endif
if (rd_cost->rate == INT_MAX) return;
@@ -8494,7 +9803,8 @@ static void pick_filter_intra_interframe(
const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra,
int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
- PREDICTION_MODE *mode_uv, FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
+ UV_PREDICTION_MODE *mode_uv,
+ FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
#if CONFIG_EXT_INTRA
int8_t *uv_angle_delta,
#endif // CONFIG_EXT_INTRA
@@ -8531,7 +9841,7 @@ static void pick_filter_intra_interframe(
// TODO(huisu): use skip_mask for further speedup.
(void)skip_mask;
mbmi->mode = DC_PRED;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE_FRAME;
if (!rd_pick_filter_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
@@ -8600,7 +9910,8 @@ static void pick_filter_intra_interframe(
rate2 += write_uniform_cost(
FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
#if CONFIG_EXT_INTRA
- if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
+ if (av1_is_directional_mode(mbmi->uv_mode, bsize) &&
+ av1_use_angle_delta(bsize)) {
rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
}
@@ -8628,7 +9939,7 @@ static void pick_filter_intra_interframe(
} else {
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
}
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
if (this_rd < *best_intra_rd) {
*best_intra_rd = this_rd;
@@ -8693,6 +10004,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
unsigned char segment_id = mbmi->segment_id;
int comp_pred, i, k;
int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv frame_comp_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
#if CONFIG_EXT_INTER
@@ -8722,7 +10036,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
+#if CONFIG_EXT_COMP_REFS
+ unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
+#else
unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_EXT_COMP_REFS
aom_prob comp_mode_p;
int64_t best_intra_rd = INT64_MAX;
unsigned int best_pred_sse = UINT_MAX;
@@ -8730,7 +10048,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
int64_t dist_uvs[TX_SIZES_ALL];
int skip_uvs[TX_SIZES_ALL];
- PREDICTION_MODE mode_uv[TX_SIZES_ALL];
+ UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
#if CONFIG_PALETTE
PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
#endif // CONFIG_PALETTE
@@ -8747,7 +10065,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
int best_skip2 = 0;
- uint8_t ref_frame_skip_mask[2] = { 0 };
+ uint16_t ref_frame_skip_mask[2] = { 0 };
uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
@@ -8850,6 +10168,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
*returnrate_nocoef = INT_MAX;
#endif // CONFIG_SUPERTX
+#if CONFIG_SPEED_REFS
+ memset(x->mbmi_ext->ref_mvs, 0, sizeof(x->mbmi_ext->ref_mvs));
+#endif // CONFIG_SPEED_REFS
+
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
x->mbmi_ext->mode_context[ref_frame] = 0;
@@ -8873,6 +10195,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_GLOBAL_MOTION
#if CONFIG_EXT_INTER
frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+#if CONFIG_COMPOUND_SINGLEREF
+ frame_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+ frame_comp_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+#endif // CONFIG_COMPOUND_SINGLEREF
#if CONFIG_GLOBAL_MOTION
frame_mv[ZERO_ZEROMV][ref_frame].as_int =
gm_get_motion_vector(&cm->global_motion[ref_frame],
@@ -8934,6 +10260,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
// Skip checking missing references in both single and compound reference
// modes. Note that a mode will be skipped iff both reference frames
// are masked out.
+#if CONFIG_EXT_COMP_REFS
+ ref_frame_skip_mask[0] |= (1 << ref_frame);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+#else // !CONFIG_EXT_COMP_REFS
#if CONFIG_EXT_REFS
if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) {
ref_frame_skip_mask[0] |= (1 << ref_frame);
@@ -8945,6 +10275,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#if CONFIG_EXT_REFS
}
#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_COMP_REFS
} else {
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
// Skip fixed mv modes for poor references
@@ -9000,6 +10331,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
+#if CONFIG_COMPOUND_SINGLEREF
+ if (frame_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int ||
+ frame_comp_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int !=
+ zeromv.as_int)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << SR_NEAREST_NEARMV);
+#endif // CONFIG_COMPOUND_SINGLEREF
#endif // CONFIG_EXT_INTER
}
}
@@ -9077,7 +10414,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int compmode_cost = 0;
int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
int64_t distortion2_y = 0;
int64_t total_sse_y = INT64_MAX;
#endif
@@ -9106,6 +10443,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
frame_mv[this_mode][second_ref_frame].as_int =
frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(this_mode)) {
+ frame_mv[this_mode][ref_frame].as_int =
+ frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
+ frame_comp_mv[this_mode][ref_frame].as_int =
+ frame_mv[compound_ref1_mode(this_mode)][ref_frame].as_int;
+#endif // CONFIG_COMPOUND_SINGLEREF
}
#endif // CONFIG_EXT_INTER
@@ -9154,6 +10498,34 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
(ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
continue;
+#if CONFIG_EXT_COMP_REFS
+// TODO(zoeliu): Following toggle between #if 0/1 and the bug will manifest
+// itself.
+#if 0
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame]) ||
+ (second_ref_frame > INTRA_FRAME &&
+ (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))))
+ printf("Frame=%d, bsize=%d, (mi_row,mi_col)=(%d,%d), ref_frame=%d, "
+ "second_ref_frame=%d\n", cm->current_video_frame, bsize, mi_row,
+ mi_col, ref_frame, second_ref_frame);
+
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
+ if (second_ref_frame > INTRA_FRAME &&
+ (!(cpi->ref_frame_flags & flag_list[second_ref_frame])))
+ continue;
+#endif // 0
+
+#if !USE_UNI_COMP_REFS
+ // NOTE(zoeliu): Temporarily disable uni-directional comp refs
+ if (second_ref_frame > INTRA_FRAME) {
+ if (!((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME)))
+ continue;
+ }
+ assert(second_ref_frame <= INTRA_FRAME ||
+ ((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME)));
+#endif // !USE_UNI_COMP_REFS
+#endif // CONFIG_EXT_COMP_REFS
+
if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
// Test best rd so far against threshold for trying this mode.
@@ -9239,7 +10611,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
}
mbmi->mode = this_mode;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->ref_frame[0] = ref_frame;
mbmi->ref_frame[1] = second_ref_frame;
#if CONFIG_PALETTE
@@ -9267,6 +10639,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // Single ref compound mode
+ if (!comp_pred && is_inter_singleref_comp_mode(mbmi->mode)) {
+ xd->block_refs[1] = xd->block_refs[0];
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = xd->plane[i].pre[0];
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
@@ -9277,7 +10658,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
struct macroblockd_plane *const pd = &xd->plane[1];
#if CONFIG_EXT_INTRA
is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
- if (is_directional_mode) {
+ if (is_directional_mode && av1_use_angle_delta(bsize)) {
int rate_dummy;
int64_t model_rd = INT64_MAX;
if (!angle_stats_ready) {
@@ -9390,10 +10771,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (av1_is_intra_filter_switchable(p_angle))
rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
#endif // CONFIG_INTRA_INTERP
- rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
- MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+ if (av1_use_angle_delta(bsize)) {
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
+ MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+ }
}
- if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+ if (av1_is_directional_mode(mbmi->uv_mode, bsize) &&
+ av1_use_angle_delta(bsize)) {
rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
}
@@ -9409,7 +10793,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
mbmi->filter_intra_mode_info.filter_intra_mode[0]);
}
}
- if (mbmi->uv_mode == DC_PRED) {
+ if (mbmi->uv_mode == UV_DC_PRED) {
rate2 +=
av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
@@ -9422,7 +10806,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) distortion2_y = distortion_y;
#endif
} else {
@@ -9481,6 +10865,27 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
}
}
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(mbmi->mode)) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+ // TODO(zoeliu): To further investigate which ref_mv_idx should be
+ // chosen for the mode of SR_NEAR_NEWMV.
+ int ref_mv_idx = 0;
+ // Special case: SR_NEAR_NEWMV mode use
+ // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
+ // mbmi->ref_mv_idx (like NEWMV)
+ if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1;
+
+ if (compound_ref0_mode(mbmi->mode) == NEWMV ||
+ compound_ref1_mode(mbmi->mode) == NEWMV) {
+ int_mv this_mv =
+ mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
+ xd->n8_h << MI_SIZE_LOG2, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
+ }
+ }
+#endif // CONFIG_COMPOUND_SINGLEREF
} else {
#endif // CONFIG_EXT_INTER
if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
@@ -9500,6 +10905,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
{
RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
av1_init_rd_stats(&rd_stats);
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ // While av1 master uses rd_stats_y.rate through out the codebase,
+ // which is set when handle_inter_moden is called, the daala-dist code
+ // in rd_pick_partition() for cb4x4 and sub8x8 blocks need to know
+ // .dist_y which comes from rd_stats_y.dist and rd_stats_y.sse.
+ // The problem is rd_stats_y.dist and rd_stats_y.sse are sometimes not
+ // initialized when rd_stats.skip = 1,
+ // then instead rd_stats.dist and rd_stats.sse have the
+ // combined luma and chroma dist and sse.
+ // This can be seen inside motion_mode_rd(), which is called by
+ // handle_inter_mode().
+ if (bsize < BLOCK_8X8) av1_init_rd_stats(&rd_stats_y);
+#endif
rd_stats.rate = rate2;
// Point to variables that are maintained between loop iterations
@@ -9510,6 +10928,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_EXT_INTER
this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
&rd_stats_uv, &disable_skip, frame_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ frame_comp_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
mi_row, mi_col, &args, best_rd);
rate2 = rd_stats.rate;
@@ -9518,23 +10939,39 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
total_sse = rd_stats.sse;
rate_y = rd_stats_y.rate;
rate_uv = rd_stats_uv.rate;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ if (rd_stats_y.rate != INT_MAX) {
+ assert(rd_stats_y.sse < INT64_MAX);
+ assert(rd_stats_y.dist < INT64_MAX);
+ }
+ total_sse_y = rd_stats_y.sse;
+ distortion2_y = rd_stats_y.dist;
+ }
#endif
}
// TODO(jingning): This needs some refactoring to improve code quality
// and reduce redundant steps.
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ if ((have_nearmv_in_inter_mode(mbmi->mode) &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
+ ((mbmi->mode == NEWMV || mbmi->mode == SR_NEW_NEWMV ||
+ mbmi->mode == NEW_NEWMV) &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 1))
+#else // !CONFIG_COMPOUND_SINGLEREF
if ((have_nearmv_in_inter_mode(mbmi->mode) &&
mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
- mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
-#else
+ mbmi_ext->ref_mv_count[ref_frame_type] > 1))
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
if ((mbmi->mode == NEARMV &&
mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
- (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
-#endif
+ (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1))
+#endif // CONFIG_EXT_INTER
+ {
int_mv backup_mv = frame_mv[NEARMV][ref_frame];
MB_MODE_INFO backup_mbmi = *mbmi;
int backup_skip = x->skip;
@@ -9560,18 +10997,16 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
rate2 += (rate2 < INT_MAX ? cpi->drl_mode_cost0[drl_ctx][0] : 0);
if (this_rd < INT64_MAX) {
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
- RDCOST(x->rdmult, x->rddiv, 0, total_sse))
- tmp_ref_rd =
- RDCOST(x->rdmult, x->rddiv,
- rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
- distortion2);
+ if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) <
+ RDCOST(x->rdmult, 0, total_sse))
+ tmp_ref_rd = RDCOST(
+ x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
+ distortion2);
else
- tmp_ref_rd =
- RDCOST(x->rdmult, x->rddiv,
- rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
- rate_y - rate_uv,
- total_sse);
+ tmp_ref_rd = RDCOST(
+ x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
+ rate_y - rate_uv,
+ total_sse);
}
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
@@ -9587,6 +11022,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
av1_invalid_rd_stats(&tmp_rd_stats);
+
x->skip = 0;
mbmi->ref_mv_idx = 1 + ref_idx;
@@ -9627,6 +11063,34 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
xd->n8_h << MI_SIZE_LOG2, xd);
mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
}
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(mbmi->mode)) {
+ int ref_mv_idx = mbmi->ref_mv_idx;
+ // Special case: SR_NEAR_NEWMV mode use
+ // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
+ // mbmi->ref_mv_idx (like NEWMV)
+ if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1 + mbmi->ref_mv_idx;
+
+ // TODO(zoeliu): For the mode of SR_NEAREST_NEWMV, as it only runs
+ // the "if", not the "else if",
+ // mbmi_ext->ref_mvs[mbmi->ref_frame[0]] takes the
+ // value for "NEWMV", instead of "NEARESTMV".
+ if (compound_ref0_mode(mbmi->mode) == NEWMV ||
+ compound_ref1_mode(mbmi->mode) == NEWMV) {
+ int_mv this_mv =
+ mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
+ xd->n8_h << MI_SIZE_LOG2, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
+ } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV ||
+ compound_ref1_mode(mbmi->mode) == NEARESTMV) {
+ int_mv this_mv =
+ mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
+ xd->n8_h << MI_SIZE_LOG2, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
+ }
+#endif // CONFIG_COMPOUND_SINGLEREF
} else {
#endif // CONFIG_EXT_INTER
for (ref = 0; ref < 1 + comp_pred; ++ref) {
@@ -9657,16 +11121,28 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
frame_mv[NEARMV][ref_frame] = cur_mv;
av1_init_rd_stats(&tmp_rd_stats);
-
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ // With the same reason as 'rd_stats_y' passed to above
+ // handle_inter_mode(), tmp_rd_stats_y.dist and
+ // tmp_rd_stats_y.sse are sometimes not initialized, esp. when
+ // tmp_rd_stats.skip = 1 and tmp_rd_stats.dist and .sse
+ // represent combined luma and chroma .dist and .sse,
+ // we should initialized tmp_rd_stats_y.
+ if (bsize < BLOCK_8X8) av1_init_rd_stats(&tmp_rd_stats_y);
+#endif
// Point to variables that are not maintained between iterations
args.single_newmv = dummy_single_newmv;
#if CONFIG_EXT_INTER
args.single_newmv_rate = dummy_single_newmv_rate;
args.modelled_rd = NULL;
#endif // CONFIG_EXT_INTER
- tmp_alt_rd = handle_inter_mode(
- cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv,
- &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
+ tmp_alt_rd = handle_inter_mode(cpi, x, bsize, &tmp_rd_stats,
+ &tmp_rd_stats_y, &tmp_rd_stats_uv,
+ &dummy_disable_skip, frame_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ frame_comp_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, &args, best_rd);
// Prevent pointers from escaping local scope
args.single_newmv = NULL;
#if CONFIG_EXT_INTER
@@ -9696,25 +11172,22 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (tmp_alt_rd < INT64_MAX) {
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- tmp_alt_rd = RDCOST(x->rdmult, x->rddiv, tmp_rd_stats.rate,
- tmp_rd_stats.dist);
+ tmp_alt_rd =
+ RDCOST(x->rdmult, tmp_rd_stats.rate, tmp_rd_stats.dist);
#else
- if (RDCOST(x->rdmult, x->rddiv,
- tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
+ if (RDCOST(x->rdmult, tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
tmp_rd_stats.dist) <
- RDCOST(x->rdmult, x->rddiv, 0, tmp_rd_stats.sse))
- tmp_alt_rd =
- RDCOST(x->rdmult, x->rddiv,
- tmp_rd_stats.rate +
- av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
- tmp_rd_stats.dist);
+ RDCOST(x->rdmult, 0, tmp_rd_stats.sse))
+ tmp_alt_rd = RDCOST(
+ x->rdmult, tmp_rd_stats.rate +
+ av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
+ tmp_rd_stats.dist);
else
- tmp_alt_rd =
- RDCOST(x->rdmult, x->rddiv,
- tmp_rd_stats.rate +
- av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
- tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
- tmp_rd_stats.sse);
+ tmp_alt_rd = RDCOST(
+ x->rdmult, tmp_rd_stats.rate +
+ av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
+ tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
+ tmp_rd_stats.sse);
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
}
@@ -9730,8 +11203,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
tmp_ref_rd = tmp_alt_rd;
backup_mbmi = *mbmi;
backup_skip = x->skip;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) {
+ if (tmp_rd_stats_y.rate != INT_MAX) {
+ assert(tmp_rd_stats_y.sse < INT64_MAX);
+ assert(tmp_rd_stats_y.dist < INT64_MAX);
+ }
total_sse_y = tmp_rd_stats_y.sse;
distortion2_y = tmp_rd_stats_y.dist;
}
@@ -9774,19 +11251,33 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
// Estimate the reference frame signaling cost and add it
// to the rolling cost variable.
if (comp_pred) {
+#if CONFIG_EXT_COMP_REFS
+ rate2 += ref_costs_comp[ref_frame][second_ref_frame];
+#else // !CONFIG_EXT_COMP_REFS
rate2 += ref_costs_comp[ref_frame];
#if CONFIG_EXT_REFS
rate2 += ref_costs_comp[second_ref_frame];
#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_COMP_REFS
} else {
rate2 += ref_costs_single[ref_frame];
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // Add the cost to signal single/comp mode in single ref.
+ if (!comp_pred && cm->reference_mode != COMPOUND_REFERENCE) {
+ aom_prob singleref_comp_mode_p = av1_get_inter_mode_prob(cm, xd);
+ rate2 += av1_cost_bit(singleref_comp_mode_p,
+ is_inter_singleref_comp_mode(mbmi->mode));
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- if (ref_frame == INTRA_FRAME) {
+ if (ref_frame == INTRA_FRAME)
#else
- if (!disable_skip) {
+ if (!disable_skip)
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+ {
if (skippable) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
@@ -9795,9 +11286,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
// Cost the skip mb case
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
} else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0,
- distortion2) <
- RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) {
+ if (RDCOST(x->rdmult, rate_y + rate_uv + rate_skip0, distortion2) <
+ RDCOST(x->rdmult, rate_skip1, total_sse)) {
// Add in the cost of the no skip flag.
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
} else {
@@ -9809,8 +11299,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
this_skip2 = 1;
rate_y = 0;
rate_uv = 0;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) distortion2_y = total_sse_y;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ assert(total_sse_y < INT64_MAX);
+ distortion2_y = total_sse_y;
+ }
#endif
}
} else {
@@ -9819,11 +11312,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
}
// Calculate the final RD estimate for this mode.
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
} else {
this_skip2 = mbmi->skip;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
if (this_skip2) {
rate_y = 0;
rate_uv = 0;
@@ -9831,6 +11324,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
}
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if ((bsize < BLOCK_8X8) && (rate2 != INT_MAX)) {
+ assert(distortion2_y < INT64_MAX);
+ }
+#endif
+
if (ref_frame == INTRA_FRAME) {
// Keep record of best intra rd
if (this_rd < best_intra_rd) {
@@ -9875,12 +11374,18 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
*returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
mbmi->ref_frame[0] != INTRA_FRAME);
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+#if CONFIG_WARPED_MOTION
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+#endif
#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
MODE_INFO *const mi = xd->mi[0];
const MOTION_MODE motion_allowed = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#if CONFIG_GLOBAL_MOTION
0, xd->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
mi);
if (motion_allowed == WARPED_CAUSAL)
*returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
@@ -9901,8 +11406,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
this_skip2 || skippable);
best_rate_uv = rate_uv;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ assert(distortion2_y < INT64_MAX);
+ rd_cost->dist_y = distortion2_y;
+ }
#endif
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
@@ -9911,7 +11419,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_VAR_TX
}
}
-
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
+ assert(rd_cost->dist_y < INT64_MAX);
+ }
+#endif
/* keep record of best compound/single-only prediction */
if (!disable_skip && ref_frame != INTRA_FRAME) {
int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
@@ -9924,8 +11436,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
hybrid_rate = rate2 + compmode_cost;
}
- single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
- hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
+ single_rd = RDCOST(x->rdmult, single_rate, distortion2);
+ hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
if (!comp_pred) {
if (single_rd < best_pred_rd[SINGLE_REFERENCE])
@@ -9963,6 +11475,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // Single ref compound mode
+ if (!has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode)) {
+ xd->block_refs[1] = xd->block_refs[0];
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = xd->plane[i].pre[0];
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
if (is_inter_mode(mbmi->mode)) {
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
#if CONFIG_MOTION_VAR
@@ -9996,9 +11517,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
}
- if (RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
+ if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
(rd_stats_y.dist + rd_stats_uv.dist)) >
- RDCOST(x->rdmult, x->rddiv, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
+ RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
skip_blk = 1;
rd_stats_y.rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
rd_stats_uv.rate = 0;
@@ -10009,8 +11530,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
rd_stats_y.rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
}
- if (RDCOST(x->rdmult, x->rddiv, best_rate_y + best_rate_uv, rd_cost->dist) >
- RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
+ if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
+ RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
(rd_stats_y.dist + rd_stats_uv.dist))) {
#if CONFIG_VAR_TX
int idx, idy;
@@ -10031,15 +11552,24 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
rd_cost->rate +=
(rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist;
-#endif
- rd_cost->rdcost =
- RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+ rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
best_skip2 = skip_blk;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ assert(rd_cost->rate != INT_MAX);
+ assert(rd_cost->dist_y < INT64_MAX);
+ rd_cost->dist_y = rd_stats_y.dist;
+ }
+#endif
}
}
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
+ assert(rd_cost->dist_y < INT64_MAX);
+ }
+#endif
+
#if CONFIG_PALETTE
// Only try palette mode when the best mode so far is an intra mode.
if (try_palette && !is_inter_mode(best_mbmode.mode)) {
@@ -10058,7 +11588,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
MB_MODE_INFO best_mbmi_palette = best_mbmode;
mbmi->mode = DC_PRED;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE_FRAME;
rate_overhead_palette = rd_pick_palette_intra_sby(
@@ -10119,7 +11649,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_SUPERTX
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
}
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
if (this_rd < best_rd) {
best_mode_index = 3;
mbmi->mv[0].as_int = 0;
@@ -10165,10 +11695,14 @@ PALETTE_EXIT:
}
#endif // CONFIG_FILTER_INTRA
- // The inter modes' rate costs are not calculated precisely in some cases.
- // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
- // ZEROMV. Here, checks are added for those cases, and the mode decisions
- // are corrected.
+// The inter modes' rate costs are not calculated precisely in some cases.
+// Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
+// ZEROMV. Here, checks are added for those cases, and the mode decisions
+// are corrected.
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+// NOTE: For SR_NEW_NEWMV, no need to check as the two mvs from the same ref
+// are surely different from each other.
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
if (best_mbmode.mode == NEWMV
#if CONFIG_EXT_INTER
|| best_mbmode.mode == NEW_NEWMV
@@ -10248,8 +11782,9 @@ PALETTE_EXIT:
}
if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
- nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
+ nearestmv[1].as_int == best_mbmode.mv[1].as_int)
#if CONFIG_EXT_INTER
+ {
best_mbmode.mode = NEAREST_NEARESTMV;
} else {
int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
@@ -10274,6 +11809,7 @@ PALETTE_EXIT:
best_mbmode.mode = ZERO_ZEROMV;
}
#else
+ {
best_mbmode.mode = NEARESTMV;
} else if (best_mbmode.mv[0].as_int == zeromv[0].as_int &&
best_mbmode.mv[1].as_int == zeromv[1].as_int) {
@@ -10287,11 +11823,18 @@ PALETTE_EXIT:
// using a mode which can support ref_mv_idx
if (best_mbmode.ref_mv_idx != 0 &&
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ !(best_mbmode.mode == NEWMV || best_mbmode.mode == SR_NEW_NEWMV ||
+ best_mbmode.mode == NEW_NEWMV ||
+ have_nearmv_in_inter_mode(best_mbmode.mode)))
+#else // !CONFIG_COMPOUND_SINGLEREF
!(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
- have_nearmv_in_inter_mode(best_mbmode.mode))) {
-#else
- !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) {
-#endif
+ have_nearmv_in_inter_mode(best_mbmode.mode)))
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
+ !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV))
+#endif // CONFIG_EXT_INTER
+ {
best_mbmode.ref_mv_idx = 0;
}
@@ -10377,11 +11920,12 @@ PALETTE_EXIT:
) {
#if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
// Correct the motion mode for ZEROMV
- const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
-#if SEPARATE_GLOBAL_MOTION
- 0, xd->global_motion,
-#endif // SEPARATE_GLOBAL_MOTION
- xd->mi[0]);
+ const MOTION_MODE last_motion_mode_allowed =
+ motion_mode_allowed(0, xd->global_motion,
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
+ xd->mi[0]);
if (mbmi->motion_mode > last_motion_mode_allowed)
mbmi->motion_mode = last_motion_mode_allowed;
#endif // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
@@ -10445,7 +11989,11 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
int i;
int64_t best_pred_diff[REFERENCE_MODES];
unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
+#if CONFIG_EXT_COMP_REFS
+ unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
+#else
unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_EXT_COMP_REFS
aom_prob comp_mode_p;
InterpFilter best_filter = SWITCHABLE;
int64_t this_rd = INT64_MAX;
@@ -10476,7 +12024,7 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
#endif // CONFIG_FILTER_INTRA
mbmi->mode = ZEROMV;
mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->ref_frame[0] = LAST_FRAME;
mbmi->ref_frame[1] = NONE_FRAME;
#if CONFIG_GLOBAL_MOTION
@@ -10501,7 +12049,17 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
#if CONFIG_WARPED_MOTION
if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#if WARPED_MOTION_SORT_SAMPLES
+ int pts_mv[SAMPLES_ARRAY_SIZE];
+ mbmi->num_proj_ref[0] =
+ findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
+ // Rank the samples by motion vector difference
+ if (mbmi->num_proj_ref[0] > 1)
+ mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
+ pts_inref, mbmi->num_proj_ref[0]);
+#else
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
+#endif // WARPED_MOTION_SORT_SAMPLES
}
#endif
@@ -10548,12 +12106,12 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
// Estimate the reference frame signaling cost and add it
// to the rolling cost variable.
rate2 += ref_costs_single[LAST_FRAME];
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
rd_cost->rate = rate2;
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
#endif
if (this_rd >= best_rd_so_far) {
@@ -10646,7 +12204,8 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
// handle above row
if (xd->up_available) {
- const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
+ const int overlap =
+ AOMMIN(block_size_high[bsize] >> 1, block_size_high[BLOCK_64X64] >> 1);
const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
const int mi_row_offset = -1;
const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
@@ -10666,7 +12225,9 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
&xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi;
#endif
const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
- const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
+ const int above_step =
+ AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]);
+ const int mi_step = AOMMIN(xd->n8_w, above_step);
const int neighbor_bw = mi_step * MI_SIZE;
if (is_neighbor_overlappable(above_mbmi)) {
@@ -10725,7 +12286,8 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
// handle left column
if (xd->left_available) {
- const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
+ const int overlap =
+ AOMMIN(block_size_wide[bsize] >> 1, block_size_wide[BLOCK_64X64] >> 1);
const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
const int mi_col_offset = -1;
const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
@@ -10746,7 +12308,9 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
&xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi;
#endif
const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
- const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
+ const int left_step =
+ AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]);
+ const int mi_step = AOMMIN(xd->n8_h, left_step);
const int neighbor_bh = mi_step * MI_SIZE;
if (is_neighbor_overlappable(left_mbmi)) {
@@ -10854,8 +12418,23 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
av1_subtract_plane(x, bsize, 0);
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
+ select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ } else {
+ int idx, idy;
+ super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+ memset(x->blk_skip[0], rd_stats_y.skip,
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+ }
+ inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#else
super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#endif
assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
if (rd_stats_y.skip && rd_stats_uv.skip) {
rd_stats_y.rate = rate_skip1;
@@ -10863,10 +12442,10 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
rd_stats_y.dist = rd_stats_y.sse;
rd_stats_uv.dist = rd_stats_uv.sse;
skip_blk = 0;
- } else if (RDCOST(x->rdmult, x->rddiv,
+ } else if (RDCOST(x->rdmult,
(rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
(rd_stats_y.dist + rd_stats_uv.dist)) >
- RDCOST(x->rdmult, x->rddiv, rate_skip1,
+ RDCOST(x->rdmult, rate_skip1,
(rd_stats_y.sse + rd_stats_uv.sse))) {
rd_stats_y.rate = rate_skip1;
rd_stats_uv.rate = 0;
@@ -10879,18 +12458,33 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
}
backup_skip = skip_blk;
backup_mbmi = *mbmi;
- rd_causal = RDCOST(x->rdmult, x->rddiv, (rd_stats_y.rate + rd_stats_uv.rate),
+ rd_causal = RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate),
(rd_stats_y.dist + rd_stats_uv.dist));
- rd_causal += RDCOST(x->rdmult, x->rddiv,
- av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0);
+ rd_causal +=
+ RDCOST(x->rdmult, av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0);
// Check non-causal mode
mbmi->motion_mode = OBMC_CAUSAL;
av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
av1_subtract_plane(x, bsize, 0);
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
+ select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ } else {
+ int idx, idy;
+ super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+ memset(x->blk_skip[0], rd_stats_y.skip,
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+ }
+ inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#else
super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#endif
assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
if (rd_stats_y.skip && rd_stats_uv.skip) {
rd_stats_y.rate = rate_skip1;
@@ -10898,10 +12492,10 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
rd_stats_y.dist = rd_stats_y.sse;
rd_stats_uv.dist = rd_stats_uv.sse;
skip_blk = 0;
- } else if (RDCOST(x->rdmult, x->rddiv,
+ } else if (RDCOST(x->rdmult,
(rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
(rd_stats_y.dist + rd_stats_uv.dist)) >
- RDCOST(x->rdmult, x->rddiv, rate_skip1,
+ RDCOST(x->rdmult, rate_skip1,
(rd_stats_y.sse + rd_stats_uv.sse))) {
rd_stats_y.rate = rate_skip1;
rd_stats_uv.rate = 0;
@@ -10914,9 +12508,8 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
}
if (rd_causal >
- RDCOST(x->rdmult, x->rddiv,
- rd_stats_y.rate + rd_stats_uv.rate +
- av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
+ RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate +
+ av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
(rd_stats_y.dist + rd_stats_uv.dist))) {
x->skip = skip_blk;
} else {
diff --git a/third_party/aom/av1/encoder/rdopt.h b/third_party/aom/av1/encoder/rdopt.h
index e5d778fe5..43a6a3794 100644
--- a/third_party/aom/av1/encoder/rdopt.h
+++ b/third_party/aom/av1/encoder/rdopt.h
@@ -57,22 +57,33 @@ typedef enum OUTPUT_STATUS {
OUTPUT_HAS_DECODED_PIXELS
} OUTPUT_STATUS;
+#if CONFIG_PALETTE || CONFIG_INTRABC
+// Returns the number of colors in 'src'.
+int av1_count_colors(const uint8_t *src, int stride, int rows, int cols);
+#if CONFIG_HIGHBITDEPTH
+// Same as av1_count_colors(), but for high-bitdepth mode.
+int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
+ int bit_depth);
+#endif // CONFIG_HIGHBITDEPTH
+#endif // CONFIG_PALETTE || CONFIG_INTRABC
+
void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
OUTPUT_STATUS output_status);
-#if CONFIG_DAALA_DIST
-int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
- int dst_stride, int bsw, int bsh, int qm,
- int use_activity_masking, int qindex);
+#if CONFIG_DIST_8X8
+int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
+ const uint8_t *src, int src_stride, const uint8_t *dst,
+ int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
+ int bsh, int visible_w, int visible_h, int qindex);
#endif
#if !CONFIG_PVQ || CONFIG_VAR_TX
int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
- int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
- const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
- int use_fast_coef_costing);
+ int blk_row, int blk_col, int block, TX_SIZE tx_size,
+ const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a,
+ const ENTROPY_CONTEXT *l, int use_fast_coef_costing);
#endif
void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
diff --git a/third_party/aom/av1/encoder/segmentation.c b/third_party/aom/av1/encoder/segmentation.c
index b581a61d0..b61df43fa 100644
--- a/third_party/aom/av1/encoder/segmentation.c
+++ b/third_party/aom/av1/encoder/segmentation.c
@@ -299,12 +299,8 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
int no_pred_cost;
int t_pred_cost = INT_MAX;
- int i, tile_col, tile_row, mi_row, mi_col;
-#if CONFIG_TILE_GROUPS
+ int tile_col, tile_row, mi_row, mi_col;
const int probwt = cm->num_tg;
-#else
- const int probwt = 1;
-#endif
unsigned(*temporal_predictor_count)[2] = cm->counts.seg.pred;
unsigned *no_pred_segcounts = cm->counts.seg.tree_total;
@@ -312,7 +308,9 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
aom_prob no_pred_tree[SEG_TREE_PROBS];
aom_prob t_pred_tree[SEG_TREE_PROBS];
+#if !CONFIG_NEW_MULTISYMBOL
aom_prob t_nopred_prob[PREDICTION_PROBS];
+#endif
(void)xd;
@@ -327,7 +325,7 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
MODE_INFO **mi_ptr;
av1_tile_set_col(&tile_info, cm, tile_col);
-#if CONFIG_TILE_GROUPS && CONFIG_DEPENDENT_HORZTILES
+#if CONFIG_DEPENDENT_HORZTILES
av1_tile_set_tg_boundary(&tile_info, cm, tile_row, tile_col);
#endif
mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
@@ -357,8 +355,9 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
calc_segtree_probs(t_unpred_seg_counts, t_pred_tree, segp->tree_probs,
probwt);
t_pred_cost = cost_segmap(t_unpred_seg_counts, t_pred_tree);
-
+#if !CONFIG_NEW_MULTISYMBOL
// Add in the cost of the signaling for each prediction context.
+ int i;
for (i = 0; i < PREDICTION_PROBS; i++) {
const int count0 = temporal_predictor_count[i][0];
const int count1 = temporal_predictor_count[i][1];
@@ -372,6 +371,7 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
t_pred_cost += count0 * av1_cost_zero(t_nopred_prob[i]) +
count1 * av1_cost_one(t_nopred_prob[i]);
}
+#endif
}
// Now choose which coding method to use.
diff --git a/third_party/aom/av1/encoder/speed_features.c b/third_party/aom/av1/encoder/speed_features.c
index e2275a54f..eeab33a95 100644
--- a/third_party/aom/av1/encoder/speed_features.c
+++ b/third_party/aom/av1/encoder/speed_features.c
@@ -35,7 +35,7 @@ static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
// TODO(aconverse@google.com): These settings are pretty relaxed, tune them for
// each speed setting
static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
- { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
+ { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
{ { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
{ { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
{ { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
@@ -171,12 +171,24 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
sf->recode_loop = ALLOW_RECODE_KFARFGF;
#if CONFIG_TX64X64
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
+#if CONFIG_CFL
+ sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V;
+#else
sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC_H_V;
+#endif // CONFIG_CFL
#endif // CONFIG_TX64X64
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+#if CONFIG_CFL
+ sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V;
+#else
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+#endif
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+#if CONFIG_CFL
+ sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V;
+#else
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+#endif
sf->tx_size_search_breakout = 1;
sf->partition_search_breakout_rate_thr = 80;
@@ -199,7 +211,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
: FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR;
sf->disable_filter_search_var_thresh = 100;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+ sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->allow_partition_search_skip = 1;
sf->use_upsampled_references = 0;
@@ -227,10 +239,18 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
sf->mode_skip_start = 6;
#if CONFIG_TX64X64
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
+#if CONFIG_CFL
+ sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC;
+#else
sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC;
+#endif // CONFIG_CFL
#endif // CONFIG_TX64X64
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
+#if CONFIG_CFL
+ sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC;
+#else
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
+#endif // CONFIG_CFL
sf->adaptive_interp_filter_search = 1;
}
@@ -255,7 +275,11 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
sf->disable_filter_search_var_thresh = 500;
for (i = 0; i < TX_SIZES; ++i) {
sf->intra_y_mode_mask[i] = INTRA_DC;
+#if CONFIG_CFL
+ sf->intra_uv_mode_mask[i] = UV_INTRA_DC;
+#else
sf->intra_uv_mode_mask[i] = INTRA_DC;
+#endif // CONFIG_CFL
}
sf->partition_search_breakout_rate_thr = 500;
sf->mv.reduce_first_step_size = 1;
@@ -405,7 +429,11 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
+#if CONFIG_CFL
+ sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
+#else
sf->intra_uv_mode_mask[i] = INTRA_ALL;
+#endif // CONFIG_CFL
}
sf->use_rd_breakout = 0;
sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
@@ -413,7 +441,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
sf->use_fast_coef_costing = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->schedule_mode_search = 0;
- for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL;
+ for (i = 0; i < BLOCK_SIZES_ALL; ++i) sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_LARGEST;
sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h
index 5710d77c7..2c89f4e5c 100644
--- a/third_party/aom/av1/encoder/speed_features.h
+++ b/third_party/aom/av1/encoder/speed_features.h
@@ -29,6 +29,24 @@ enum {
#endif // CONFIG_SMOOTH_HV
#endif // CONFIG_ALT_INTRA
(1 << TM_PRED),
+#if CONFIG_CFL
+ UV_INTRA_ALL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) |
+ (1 << UV_D45_PRED) | (1 << UV_D135_PRED) |
+ (1 << UV_D117_PRED) | (1 << UV_D153_PRED) |
+ (1 << UV_D207_PRED) | (1 << UV_D63_PRED) |
+#if CONFIG_ALT_INTRA
+ (1 << UV_SMOOTH_PRED) |
+#if CONFIG_SMOOTH_HV
+ (1 << UV_SMOOTH_V_PRED) | (1 << UV_SMOOTH_H_PRED) |
+#endif // CONFIG_SMOOTH_HV
+#endif // CONFIG_ALT_INTRA
+ (1 << UV_TM_PRED),
+ UV_INTRA_DC = (1 << UV_DC_PRED),
+ UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_TM_PRED),
+ UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED),
+ UV_INTRA_DC_TM_H_V = (1 << UV_DC_PRED) | (1 << UV_TM_PRED) |
+ (1 << UV_V_PRED) | (1 << UV_H_PRED),
+#endif // CONFIG_CFL
INTRA_DC = (1 << DC_PRED),
INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
@@ -38,6 +56,11 @@ enum {
#if CONFIG_EXT_INTER
enum {
+#if CONFIG_COMPOUND_SINGLEREF
+// TODO(zoeliu): To further consider following single ref comp modes:
+// SR_NEAREST_NEARMV, SR_NEAREST_NEWMV, SR_NEAR_NEWMV,
+// SR_ZERO_NEWMV, and SR_NEW_NEWMV.
+#endif // CONFIG_COMPOUND_SINGLEREF
INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV) |
(1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | (1 << NEW_NEWMV) |
(1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | (1 << NEW_NEARMV) |
@@ -67,7 +90,7 @@ enum {
(1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
(1 << NEAR_NEARMV),
};
-#else
+#else // !CONFIG_EXT_INTER
enum {
INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
INTER_NEAREST = (1 << NEARESTMV),
@@ -399,10 +422,6 @@ typedef struct SPEED_FEATURES {
int intra_y_mode_mask[TX_SIZES];
int intra_uv_mode_mask[TX_SIZES];
- // These bit masks allow you to enable or disable intra modes for each
- // prediction block size separately.
- int intra_y_mode_bsize_mask[BLOCK_SIZES];
-
// This variable enables an early break out of mode testing if the model for
// rd built from the prediction signal indicates a value that's much
// higher than the best rd we've seen so far.
@@ -417,7 +436,7 @@ typedef struct SPEED_FEATURES {
// A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
// modes are used in order from LSB to MSB for each BLOCK_SIZE.
- int inter_mode_mask[BLOCK_SIZES];
+ int inter_mode_mask[BLOCK_SIZES_ALL];
// This feature controls whether we do the expensive context update and
// calculation in the rd coefficient costing loop.
diff --git a/third_party/aom/av1/encoder/temporal_filter.c b/third_party/aom/av1/encoder/temporal_filter.c
index 1ed1ebdb2..604647922 100644
--- a/third_party/aom/av1/encoder/temporal_filter.c
+++ b/third_party/aom/av1/encoder/temporal_filter.c
@@ -41,7 +41,7 @@ static void temporal_filter_predictors_mb_c(
enum mv_precision mv_precision_uv;
int uv_stride;
// TODO(angiebird): change plane setting accordingly
- ConvolveParams conv_params = get_conv_params(which_mv, 0);
+ ConvolveParams conv_params = get_conv_params(which_mv, which_mv, 0);
#if USE_TEMPORALFILTER_12TAP
#if CONFIG_DUAL_FILTER
@@ -413,10 +413,10 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale,
mb_col * 16, mb_row * 16);
+// Apply the filter (YUV)
#if CONFIG_HIGHBITDEPTH
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
- // Apply the filter (YUV)
av1_highbd_temporal_filter_apply(
f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
adj_strength, filter_weight, accumulator, count);
@@ -429,7 +429,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
mb_uv_width, mb_uv_height, adj_strength, filter_weight,
accumulator + 512, count + 512);
} else {
- // Apply the filter (YUV)
+#endif // CONFIG_HIGHBITDEPTH
av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16, strength,
filter_weight, accumulator, count);
@@ -441,29 +441,17 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
mb_uv_width, mb_uv_height, strength, filter_weight,
accumulator + 512, count + 512);
+#if CONFIG_HIGHBITDEPTH
}
-#else
- // Apply the filter (YUV)
- av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16, strength,
- filter_weight, accumulator, count);
- av1_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256, mb_uv_width,
- mb_uv_height, strength, filter_weight,
- accumulator + 256, count + 256);
- av1_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512, mb_uv_width,
- mb_uv_height, strength, filter_weight,
- accumulator + 512, count + 512);
#endif // CONFIG_HIGHBITDEPTH
}
}
+// Normalize filter output to produce AltRef frame
#if CONFIG_HIGHBITDEPTH
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
uint16_t *dst1_16;
uint16_t *dst2_16;
- // Normalize filter output to produce AltRef frame
dst1 = cpi->alt_ref_buffer.y_buffer;
dst1_16 = CONVERT_TO_SHORTPTR(dst1);
stride = cpi->alt_ref_buffer.y_stride;
@@ -505,7 +493,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
byte += stride - mb_uv_width;
}
} else {
- // Normalize filter output to produce AltRef frame
+#endif // CONFIG_HIGHBITDEPTH
dst1 = cpi->alt_ref_buffer.y_buffer;
stride = cpi->alt_ref_buffer.y_stride;
byte = mb_y_offset;
@@ -541,43 +529,7 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
}
byte += stride - mb_uv_width;
}
- }
-#else
- // Normalize filter output to produce AltRef frame
- dst1 = cpi->alt_ref_buffer.y_buffer;
- stride = cpi->alt_ref_buffer.y_stride;
- byte = mb_y_offset;
- for (i = 0, k = 0; i < 16; i++) {
- for (j = 0; j < 16; j++, k++) {
- dst1[byte] =
- (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
-
- // move to next pixel
- byte++;
- }
- byte += stride - 16;
- }
-
- dst1 = cpi->alt_ref_buffer.u_buffer;
- dst2 = cpi->alt_ref_buffer.v_buffer;
- stride = cpi->alt_ref_buffer.uv_stride;
- byte = mb_uv_offset;
- for (i = 0, k = 256; i < mb_uv_height; i++) {
- for (j = 0; j < mb_uv_width; j++, k++) {
- int m = k + 256;
-
- // U
- dst1[byte] =
- (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
-
- // V
- dst2[byte] =
- (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
-
- // move to next pixel
- byte++;
- }
- byte += stride - mb_uv_width;
+#if CONFIG_HIGHBITDEPTH
}
#endif // CONFIG_HIGHBITDEPTH
mb_y_offset += 16;
@@ -650,7 +602,11 @@ static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost,
*arnr_strength = strength;
}
-void av1_temporal_filter(AV1_COMP *cpi, int distance) {
+void av1_temporal_filter(AV1_COMP *cpi,
+#if CONFIG_BGSPRITE
+ YV12_BUFFER_CONFIG *bg,
+#endif // CONFIG_BGSPRITE
+ int distance) {
RATE_CONTROL *const rc = &cpi->rc;
int frame;
int frames_to_blur;
@@ -692,9 +648,18 @@ void av1_temporal_filter(AV1_COMP *cpi, int distance) {
// Setup frame pointers, NULL indicates frame not included in filter.
for (frame = 0; frame < frames_to_blur; ++frame) {
const int which_buffer = start_frame - frame;
- struct lookahead_entry *buf =
- av1_lookahead_peek(cpi->lookahead, which_buffer);
- frames[frames_to_blur - 1 - frame] = &buf->img;
+#if CONFIG_BGSPRITE
+ if (frame == frames_to_blur_backward && bg != NULL) {
+ // Insert bg into frames at ARF index.
+ frames[frames_to_blur - 1 - frame] = bg;
+ } else {
+#endif // CONFIG_BGSPRITE
+ struct lookahead_entry *buf =
+ av1_lookahead_peek(cpi->lookahead, which_buffer);
+ frames[frames_to_blur - 1 - frame] = &buf->img;
+#if CONFIG_BGSPRITE
+ }
+#endif // CONFIG_BGSPRITE
}
if (frames_to_blur > 0) {
diff --git a/third_party/aom/av1/encoder/temporal_filter.h b/third_party/aom/av1/encoder/temporal_filter.h
index bc0863a63..ebb24703f 100644
--- a/third_party/aom/av1/encoder/temporal_filter.h
+++ b/third_party/aom/av1/encoder/temporal_filter.h
@@ -16,7 +16,11 @@
extern "C" {
#endif
-void av1_temporal_filter(AV1_COMP *cpi, int distance);
+void av1_temporal_filter(AV1_COMP *cpi,
+#if CONFIG_BGSPRITE
+ YV12_BUFFER_CONFIG *bg,
+#endif // CONFIG_BGSPRITE
+ int distance);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/aom/av1/encoder/tokenize.c b/third_party/aom/av1/encoder/tokenize.c
index 18d2cd958..b9db891b3 100644
--- a/third_party/aom/av1/encoder/tokenize.c
+++ b/third_party/aom/av1/encoder/tokenize.c
@@ -277,12 +277,12 @@ static void cost_coeffs_b(int plane, int block, int blk_row, int blk_col,
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
const PLANE_TYPE type = pd->plane_type;
- const int ref = is_inter_block(mbmi);
- const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
- const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, ref);
- const int rate = av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order,
- pd->above_context + blk_col,
- pd->left_context + blk_row, 0);
+ const TX_TYPE tx_type =
+ av1_get_tx_type(type, xd, blk_row, blk_col, block, tx_size);
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
+ const int rate = av1_cost_coeffs(
+ cpi, x, plane, blk_row, blk_col, block, tx_size, scan_order,
+ pd->above_context + blk_col, pd->left_context + blk_row, 0);
args->this_rate += rate;
(void)plane_bsize;
av1_set_contexts(xd, pd, plane, tx_size, p->eobs[block] > 0, blk_col,
@@ -323,42 +323,48 @@ void av1_tokenize_palette_sb(const AV1_COMP *cpi,
const struct ThreadData *const td, int plane,
TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
int *rate) {
+ assert(plane == 0 || plane == 1);
const MACROBLOCK *const x = &td->mb;
const MACROBLOCKD *const xd = &x->e_mbd;
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const uint8_t *const color_map = xd->plane[plane].color_index_map;
const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- const int n = pmi->palette_size[plane];
- int i, j;
- int this_rate = 0;
- uint8_t color_order[PALETTE_MAX_SIZE];
- const aom_prob(
- *const probs)[PALETTE_COLOR_INDEX_CONTEXTS][PALETTE_COLORS - 1] =
- plane == 0 ? av1_default_palette_y_color_index_prob
- : av1_default_palette_uv_color_index_prob;
+ aom_cdf_prob(
+ *palette_cdf)[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] =
+ plane ? xd->tile_ctx->palette_uv_color_index_cdf
+ : xd->tile_ctx->palette_y_color_index_cdf;
int plane_block_width, rows, cols;
av1_get_block_dimensions(bsize, plane, xd, &plane_block_width, NULL, &rows,
&cols);
- assert(plane == 0 || plane == 1);
+ // The first color index does not use context or entropy.
+ (*t)->token = color_map[0];
+ (*t)->palette_cdf = NULL;
+ (*t)->skip_eob_node = 0;
+ ++(*t);
+
+ const int n = pmi->palette_size[plane];
+ const int calc_rate = rate && dry_run == DRY_RUN_COSTCOEFFS;
+ int this_rate = 0;
+ uint8_t color_order[PALETTE_MAX_SIZE];
#if CONFIG_PALETTE_THROUGHPUT
- int k;
- for (k = 1; k < rows + cols - 1; ++k) {
- for (j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) {
- i = k - j;
+ for (int k = 1; k < rows + cols - 1; ++k) {
+ for (int j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) {
+ int i = k - j;
#else
- for (i = 0; i < rows; ++i) {
- for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+ for (int i = 0; i < rows; ++i) {
+ for (int j = (i == 0 ? 1 : 0); j < cols; ++j) {
#endif // CONFIG_PALETTE_THROUGHPUT
int color_new_idx;
const int color_ctx = av1_get_palette_color_index_context(
color_map, plane_block_width, i, j, n, color_order, &color_new_idx);
assert(color_new_idx >= 0 && color_new_idx < n);
- if (dry_run == DRY_RUN_COSTCOEFFS)
+ if (calc_rate) {
this_rate += cpi->palette_y_color_cost[n - PALETTE_MIN_SIZE][color_ctx]
[color_new_idx];
+ }
(*t)->token = color_new_idx;
- (*t)->context_tree = probs[n - PALETTE_MIN_SIZE][color_ctx];
+ (*t)->palette_cdf = palette_cdf[n - PALETTE_MIN_SIZE][color_ctx];
(*t)->skip_eob_node = 0;
++(*t);
}
@@ -434,17 +440,13 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
const int segment_id = mbmi->segment_id;
#endif // CONFIG_SUEPRTX
const int16_t *scan, *nb;
- const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
- const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+ const TX_TYPE tx_type =
+ av1_get_tx_type(type, xd, blk_row, blk_col, block, tx_size);
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
const int ref = is_inter_block(mbmi);
unsigned int(*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref];
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *ec_ctx = cpi->common.fc;
-#endif
aom_cdf_prob(
*const coef_head_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] =
ec_ctx->coef_head_cdfs[txsize_sqr_map[tx_size]][type][ref];
@@ -595,16 +597,31 @@ void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
cost_coeffs_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
#endif
} else {
+#if CONFIG_RECT_TX_EXT
+ int is_qttx = plane_tx_size == quarter_txsize_lookup[plane_bsize];
+ const TX_SIZE sub_txs = is_qttx ? plane_tx_size : sub_tx_size_map[tx_size];
+#else
// Half the block size in transform block unit.
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+#endif
const int bsl = tx_size_wide_unit[sub_txs];
int i;
assert(bsl > 0);
for (i = 0; i < 4; ++i) {
+#if CONFIG_RECT_TX_EXT
+ int is_wide_tx = tx_size_wide_unit[sub_txs] > tx_size_high_unit[sub_txs];
+ const int offsetr =
+ is_qttx ? (is_wide_tx ? i * tx_size_high_unit[sub_txs] : 0)
+ : blk_row + ((i >> 1) * bsl);
+ const int offsetc =
+ is_qttx ? (is_wide_tx ? 0 : i * tx_size_wide_unit[sub_txs])
+ : blk_col + ((i & 0x01) * bsl);
+#else
const int offsetr = blk_row + ((i >> 1) * bsl);
const int offsetc = blk_col + ((i & 0x01) * bsl);
+#endif
int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
@@ -666,7 +683,7 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
}
#endif
const struct macroblockd_plane *const pd = &xd->plane[plane];
-#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
+#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#else
@@ -681,14 +698,30 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int idx, idy;
int block = 0;
int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
- for (idy = 0; idy < mi_height; idy += bh) {
- for (idx = 0; idx < mi_width; idx += bw) {
- tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
- block, plane, &arg);
- block += step;
+
+ const BLOCK_SIZE max_unit_bsize = get_plane_block_size(BLOCK_64X64, pd);
+ int mu_blocks_wide =
+ block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
+ int mu_blocks_high =
+ block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
+
+ mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide);
+ mu_blocks_high = AOMMIN(mi_height, mu_blocks_high);
+
+ for (idy = 0; idy < mi_height; idy += mu_blocks_high) {
+ for (idx = 0; idx < mi_width; idx += mu_blocks_wide) {
+ int blk_row, blk_col;
+ const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height);
+ const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width);
+ for (blk_row = idy; blk_row < unit_height; blk_row += bh) {
+ for (blk_col = idx; blk_col < unit_width; blk_col += bw) {
+ tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, blk_row,
+ blk_col, block, plane, &arg);
+ block += step;
+ }
+ }
}
}
-
#if !CONFIG_LV_MAP
if (!dry_run) {
(*t)->token = EOSB_TOKEN;
diff --git a/third_party/aom/av1/encoder/tokenize.h b/third_party/aom/av1/encoder/tokenize.h
index cbfa3cd91..73f0305fa 100644
--- a/third_party/aom/av1/encoder/tokenize.h
+++ b/third_party/aom/av1/encoder/tokenize.h
@@ -37,6 +37,9 @@ typedef struct {
typedef struct {
aom_cdf_prob (*tail_cdf)[CDF_SIZE(ENTROPY_TOKENS)];
aom_cdf_prob (*head_cdf)[CDF_SIZE(ENTROPY_TOKENS)];
+#if CONFIG_PALETTE
+ aom_cdf_prob *palette_cdf;
+#endif // CONFIG_PALETTE
int eob_val;
int first_val;
const aom_prob *context_tree;
diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c
new file mode 100644
index 000000000..c8d4ccb70
--- /dev/null
+++ b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "./av1_rtcd.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/aom_dsp_common.h"
+
+static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i dc = _mm_unpacklo_epi16(*p, zero);
+ const __m128i ac = _mm_unpackhi_epi16(*p, zero);
+ *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(dc), ac, 1);
+}
+
+static INLINE void update_qp(__m256i *qp) {
+ qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11);
+ qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11);
+ qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11);
+}
+
+static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *dequant_ptr, int log_scale,
+ __m256i *qp) {
+ __m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
+ round = _mm_srai_epi16(round, log_scale);
+ const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
+ const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr);
+
+ init_one_qp(&round, &qp[0]);
+ init_one_qp(&quant, &qp[1]);
+ init_one_qp(&dequant, &qp[2]);
+}
+
+static INLINE void quantize(const __m256i *qp, __m256i *c,
+ const int16_t *iscan_ptr, int log_scale,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ __m256i *eob) {
+ const __m256i abs = _mm256_abs_epi32(*c);
+ __m256i q = _mm256_add_epi32(abs, qp[0]);
+
+ __m256i q_lo = _mm256_mul_epi32(q, qp[1]);
+ __m256i q_hi = _mm256_srli_epi64(q, 32);
+ const __m256i qp_hi = _mm256_srli_epi64(qp[1], 32);
+ q_hi = _mm256_mul_epi32(q_hi, qp_hi);
+ q_lo = _mm256_srli_epi64(q_lo, 16 - log_scale);
+ q_hi = _mm256_srli_epi64(q_hi, 16 - log_scale);
+ q_hi = _mm256_slli_epi64(q_hi, 32);
+ q = _mm256_or_si256(q_lo, q_hi);
+
+ __m256i dq = _mm256_mullo_epi32(q, qp[2]);
+ dq = _mm256_srai_epi32(dq, log_scale);
+ q = _mm256_sign_epi32(q, *c);
+ dq = _mm256_sign_epi32(dq, *c);
+
+ _mm256_storeu_si256((__m256i *)qcoeff, q);
+ _mm256_storeu_si256((__m256i *)dqcoeff, dq);
+
+ const __m128i isc = _mm_loadu_si128((const __m128i *)iscan_ptr);
+ const __m128i zr = _mm_setzero_si128();
+ const __m128i lo = _mm_unpacklo_epi16(isc, zr);
+ const __m128i hi = _mm_unpackhi_epi16(isc, zr);
+ const __m256i iscan =
+ _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
+
+ const __m256i zero = _mm256_setzero_si256();
+ const __m256i zc = _mm256_cmpeq_epi32(dq, zero);
+ const __m256i nz = _mm256_cmpeq_epi32(zc, zero);
+ __m256i cur_eob = _mm256_sub_epi32(iscan, nz);
+ cur_eob = _mm256_and_si256(cur_eob, nz);
+ *eob = _mm256_max_epi32(cur_eob, *eob);
+}
+
+void av1_highbd_quantize_fp_avx2(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, int log_scale) {
+ (void)scan;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ const unsigned int step = 8;
+
+ if (LIKELY(!skip_block)) {
+ __m256i qp[3], coeff;
+
+ init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp);
+ coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
+
+ __m256i eob = _mm256_setzero_si256();
+ quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+ coeff_ptr += step;
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ iscan += step;
+ n_coeffs -= step;
+
+ update_qp(qp);
+ while (n_coeffs > 0) {
+ coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
+ quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+ coeff_ptr += step;
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ iscan += step;
+ n_coeffs -= step;
+ }
+ {
+ __m256i eob_s;
+ eob_s = _mm256_shuffle_epi32(eob, 0xe);
+ eob = _mm256_max_epi16(eob, eob_s);
+ eob_s = _mm256_shufflelo_epi16(eob, 0xe);
+ eob = _mm256_max_epi16(eob, eob_s);
+ eob_s = _mm256_shufflelo_epi16(eob, 1);
+ eob = _mm256_max_epi16(eob, eob_s);
+ const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
+ _mm256_extractf128_si256(eob, 1));
+ *eob_ptr = _mm_extract_epi16(final_eob, 0);
+ }
+ } else {
+ do {
+ const __m256i zero = _mm256_setzero_si256();
+ _mm256_storeu_si256((__m256i *)qcoeff_ptr, zero);
+ _mm256_storeu_si256((__m256i *)dqcoeff_ptr, zero);
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ n_coeffs -= step;
+ } while (n_coeffs > 0);
+ *eob_ptr = 0;
+ }
+}
diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c
index fa5626002..8d717a083 100644
--- a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c
+++ b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c
@@ -133,9 +133,10 @@ void av1_highbd_quantize_fp_sse4_1(
coeff[0] = _mm_loadu_si128((__m128i const *)src);
qparam[0] =
- _mm_set_epi32(round_ptr[1], round_ptr[1], round_ptr[1], round_ptr[0]);
- qparam[1] = _mm_set_epi64x(quant_ptr[1], quant_ptr[0]);
- qparam[2] = _mm_set_epi64x(dequant_ptr[1], dequant_ptr[0]);
+ _mm_set_epi32(round_ptr[1] >> log_scale, round_ptr[1] >> log_scale,
+ round_ptr[1] >> log_scale, round_ptr[0] >> log_scale);
+ qparam[1] = _mm_set_epi32(0, quant_ptr[1], 0, quant_ptr[0]);
+ qparam[2] = _mm_set_epi32(0, dequant_ptr[1], 0, dequant_ptr[0]);
// DC and first 3 AC
quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
@@ -143,8 +144,8 @@ void av1_highbd_quantize_fp_sse4_1(
// update round/quan/dquan for AC
qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]);
- qparam[1] = _mm_set_epi64x(quant_ptr[1], quant_ptr[1]);
- qparam[2] = _mm_set_epi64x(dequant_ptr[1], dequant_ptr[1]);
+ qparam[1] = _mm_set_epi32(0, quant_ptr[1], 0, quant_ptr[1]);
+ qparam[2] = _mm_set_epi32(0, dequant_ptr[1], 0, dequant_ptr[1]);
quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
log_scale, quanAddr, dquanAddr);
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
new file mode 100644
index 000000000..1c0a120ca
--- /dev/null
+++ b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "./av1_rtcd.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/aom_dsp_common.h"
+
+static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) {
+#if CONFIG_HIGHBITDEPTH
+ const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff);
+ const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1);
+ *c = _mm256_packs_epi32(x0, x1);
+ *c = _mm256_permute4x64_epi64(*c, 0xD8);
+#else
+ *c = _mm256_loadu_si256((const __m256i *)coeff);
+#endif
+}
+
+static INLINE void write_zero(tran_low_t *qcoeff) {
+ const __m256i zero = _mm256_setzero_si256();
+#if CONFIG_HIGHBITDEPTH
+ _mm256_storeu_si256((__m256i *)qcoeff, zero);
+ _mm256_storeu_si256((__m256i *)qcoeff + 1, zero);
+#else
+ _mm256_storeu_si256((__m256i *)qcoeff, zero);
+#endif
+}
+
+static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
+ const __m128i ac = _mm_unpackhi_epi64(*p, *p);
+ *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(*p), ac, 1);
+}
+
+static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *dequant_ptr, int log_scale,
+ __m256i *thr, __m256i *qp) {
+ __m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
+ const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
+ const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr);
+
+ if (log_scale > 0) {
+ const __m128i rnd = _mm_set1_epi16((int16_t)1 << (log_scale - 1));
+ round = _mm_add_epi16(round, rnd);
+ round = _mm_srai_epi16(round, log_scale);
+ }
+
+ init_one_qp(&round, &qp[0]);
+ init_one_qp(&quant, &qp[1]);
+
+ if (log_scale > 0) {
+ qp[1] = _mm256_slli_epi16(qp[1], log_scale);
+ }
+
+ init_one_qp(&dequant, &qp[2]);
+ *thr = _mm256_srai_epi16(qp[2], 1 + log_scale);
+}
+
+static INLINE void update_qp(int log_scale, __m256i *thr, __m256i *qp) {
+ qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11);
+ qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11);
+ qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11);
+ *thr = _mm256_srai_epi16(qp[2], 1 + log_scale);
+}
+
+#define store_quan(q, addr) \
+ do { \
+ __m256i sign_bits = _mm256_srai_epi16(q, 15); \
+ __m256i y0 = _mm256_unpacklo_epi16(q, sign_bits); \
+ __m256i y1 = _mm256_unpackhi_epi16(q, sign_bits); \
+ __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \
+ __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \
+ _mm256_storeu_si256((__m256i *)addr, x0); \
+ _mm256_storeu_si256((__m256i *)addr + 1, x1); \
+ } while (0)
+
+#if CONFIG_HIGHBITDEPTH
+#define store_two_quan(q, addr1, dq, addr2) \
+ do { \
+ store_quan(q, addr1); \
+ store_quan(dq, addr2); \
+ } while (0)
+#else
+#define store_two_quan(q, addr1, dq, addr2) \
+ do { \
+ _mm256_storeu_si256((__m256i *)addr1, q); \
+ _mm256_storeu_si256((__m256i *)addr2, dq); \
+ } while (0)
+#endif
+
+static INLINE void quantize(const __m256i *thr, const __m256i *qp, __m256i *c,
+ const int16_t *iscan_ptr, tran_low_t *qcoeff,
+ tran_low_t *dqcoeff, __m256i *eob) {
+ const __m256i abs = _mm256_abs_epi16(*c);
+ __m256i mask = _mm256_cmpgt_epi16(abs, *thr);
+ mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs, *thr));
+ const int nzflag = _mm256_movemask_epi8(mask);
+
+ if (nzflag) {
+ __m256i q = _mm256_adds_epi16(abs, qp[0]);
+ q = _mm256_mulhi_epi16(q, qp[1]);
+ q = _mm256_sign_epi16(q, *c);
+ const __m256i dq = _mm256_mullo_epi16(q, qp[2]);
+
+ store_two_quan(q, qcoeff, dq, dqcoeff);
+ const __m256i zero = _mm256_setzero_si256();
+ const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
+ const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
+ const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
+ __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
+ cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
+ *eob = _mm256_max_epi16(*eob, cur_eob);
+ } else {
+ write_zero(qcoeff);
+ write_zero(dqcoeff);
+ }
+}
+
+void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+ (void)scan_ptr;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ const unsigned int step = 16;
+
+ if (LIKELY(!skip_block)) {
+ __m256i qp[3];
+ __m256i coeff, thr;
+ const int log_scale = 0;
+
+ init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
+ read_coeff(coeff_ptr, &coeff);
+
+ __m256i eob = _mm256_setzero_si256();
+ quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+ coeff_ptr += step;
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ iscan_ptr += step;
+ n_coeffs -= step;
+
+ update_qp(log_scale, &thr, qp);
+
+ while (n_coeffs > 0) {
+ read_coeff(coeff_ptr, &coeff);
+ quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+ coeff_ptr += step;
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ iscan_ptr += step;
+ n_coeffs -= step;
+ }
+ {
+ __m256i eob_s;
+ eob_s = _mm256_shuffle_epi32(eob, 0xe);
+ eob = _mm256_max_epi16(eob, eob_s);
+ eob_s = _mm256_shufflelo_epi16(eob, 0xe);
+ eob = _mm256_max_epi16(eob, eob_s);
+ eob_s = _mm256_shufflelo_epi16(eob, 1);
+ eob = _mm256_max_epi16(eob, eob_s);
+ const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
+ _mm256_extractf128_si256(eob, 1));
+ *eob_ptr = _mm_extract_epi16(final_eob, 0);
+ }
+ } else {
+ do {
+ write_zero(qcoeff_ptr);
+ write_zero(dqcoeff_ptr);
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ n_coeffs -= step;
+ } while (n_coeffs > 0);
+ *eob_ptr = 0;
+ }
+}
+
+static INLINE void quantize_32x32(const __m256i *thr, const __m256i *qp,
+ __m256i *c, const int16_t *iscan_ptr,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ __m256i *eob) {
+ const __m256i abs = _mm256_abs_epi16(*c);
+ __m256i mask = _mm256_cmpgt_epi16(abs, *thr);
+ mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs, *thr));
+ const int nzflag = _mm256_movemask_epi8(mask);
+
+ if (nzflag) {
+ __m256i q = _mm256_adds_epi16(abs, qp[0]);
+ q = _mm256_mulhi_epu16(q, qp[1]);
+
+ __m256i dq = _mm256_mullo_epi16(q, qp[2]);
+ dq = _mm256_srli_epi16(dq, 1);
+
+ q = _mm256_sign_epi16(q, *c);
+ dq = _mm256_sign_epi16(dq, *c);
+
+ store_two_quan(q, qcoeff, dq, dqcoeff);
+ const __m256i zero = _mm256_setzero_si256();
+ const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
+ const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
+ const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
+ __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
+ cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
+ *eob = _mm256_max_epi16(*eob, cur_eob);
+ } else {
+ write_zero(qcoeff);
+ write_zero(dqcoeff);
+ }
+}
+
+void av1_quantize_fp_32x32_avx2(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+ (void)scan_ptr;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ const unsigned int step = 16;
+
+ if (LIKELY(!skip_block)) {
+ __m256i qp[3];
+ __m256i coeff, thr;
+ const int log_scale = 1;
+
+ init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
+ read_coeff(coeff_ptr, &coeff);
+
+ __m256i eob = _mm256_setzero_si256();
+ quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+ coeff_ptr += step;
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ iscan_ptr += step;
+ n_coeffs -= step;
+
+ update_qp(log_scale, &thr, qp);
+
+ while (n_coeffs > 0) {
+ read_coeff(coeff_ptr, &coeff);
+ quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr,
+ &eob);
+
+ coeff_ptr += step;
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ iscan_ptr += step;
+ n_coeffs -= step;
+ }
+ {
+ __m256i eob_s;
+ eob_s = _mm256_shuffle_epi32(eob, 0xe);
+ eob = _mm256_max_epi16(eob, eob_s);
+ eob_s = _mm256_shufflelo_epi16(eob, 0xe);
+ eob = _mm256_max_epi16(eob, eob_s);
+ eob_s = _mm256_shufflelo_epi16(eob, 1);
+ eob = _mm256_max_epi16(eob, eob_s);
+ const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
+ _mm256_extractf128_si256(eob, 1));
+ *eob_ptr = _mm_extract_epi16(final_eob, 0);
+ }
+ } else {
+ do {
+ write_zero(qcoeff_ptr);
+ write_zero(dqcoeff_ptr);
+ qcoeff_ptr += step;
+ dqcoeff_ptr += step;
+ n_coeffs -= step;
+ } while (n_coeffs > 0);
+ *eob_ptr = 0;
+ }
+}
diff --git a/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c b/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c
index 37c4b0d88..496c33395 100644
--- a/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c
+++ b/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c
@@ -203,8 +203,12 @@ static void fidtx4_sse2(__m128i *in) {
#endif // CONFIG_EXT_TX
void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in[4];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
switch (tx_type) {
case DCT_DCT: aom_fdct4x4_sse2(input, output, stride); break;
@@ -1301,8 +1305,12 @@ static void fidtx8_sse2(__m128i *in) {
#endif // CONFIG_EXT_TX
void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in[8];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
switch (tx_type) {
case DCT_DCT: aom_fdct8x8_sse2(input, output, stride); break;
@@ -2334,8 +2342,12 @@ static void fidtx16_sse2(__m128i *in0, __m128i *in1) {
#endif // CONFIG_EXT_TX
void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in0[16], in1[16];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
switch (tx_type) {
case DCT_DCT:
@@ -2550,8 +2562,12 @@ static INLINE void write_buffer_4x8(tran_low_t *output, __m128i *res) {
}
void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in[8];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
switch (tx_type) {
case DCT_DCT:
@@ -2724,8 +2740,12 @@ static INLINE void write_buffer_8x4(tran_low_t *output, __m128i *res) {
}
void av1_fht8x4_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in[8];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
switch (tx_type) {
case DCT_DCT:
@@ -2864,8 +2884,12 @@ static void row_8x16_rounding(__m128i *in, int bits) {
}
void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in[16];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
__m128i *const t = in; // Alias to top 8x8 sub block
__m128i *const b = in + 8; // Alias to bottom 8x8 sub block
@@ -3045,8 +3069,12 @@ static INLINE void load_buffer_16x8(const int16_t *input, __m128i *in,
#define col_16x8_rounding row_8x16_rounding
void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in[16];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
__m128i *const l = in; // Alias to left 8x8 sub block
__m128i *const r = in + 8; // Alias to right 8x8 sub block, which we store
@@ -3355,8 +3383,12 @@ static INLINE void fhalfright32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
// For 16x32, this means the input is a 2x2 grid of such blocks.
// For 32x16, it means the input is a 4x1 grid.
void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i intl[16], intr[16], inbl[16], inbr[16];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
switch (tx_type) {
case DCT_DCT:
@@ -3544,8 +3576,12 @@ static INLINE void write_buffer_32x16(tran_low_t *output, __m128i *res0,
}
void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in0[16], in1[16], in2[16], in3[16];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
switch (tx_type) {
@@ -3784,8 +3820,12 @@ static INLINE void write_buffer_32x32(__m128i *in0, __m128i *in1, __m128i *in2,
}
void av1_fht32x32_sse2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m128i in0[32], in1[32], in2[32], in3[32];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "No 32x32 sse2 MRC_DCT implementation");
+#endif
load_buffer_32x32(input, in0, in1, in2, in3, stride, 0, 0);
switch (tx_type) {
diff --git a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
index ae733a1ce..20ba4149c 100644
--- a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
+++ b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
@@ -14,7 +14,20 @@
#include "./av1_rtcd.h"
#include "aom/aom_integer.h"
-int64_t av1_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
+static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
+ __m256i *c) {
+ const tran_low_t *addr = coeff + offset;
+#if CONFIG_HIGHBITDEPTH
+ const __m256i x0 = _mm256_loadu_si256((const __m256i *)addr);
+ const __m256i x1 = _mm256_loadu_si256((const __m256i *)addr + 1);
+ const __m256i y = _mm256_packs_epi32(x0, x1);
+ *c = _mm256_permute4x64_epi64(y, 0xD8);
+#else
+ *c = _mm256_loadu_si256((const __m256i *)addr);
+#endif
+}
+
+int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
__m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
__m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
@@ -22,16 +35,16 @@ int64_t av1_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
__m128i sse_reg128, ssz_reg128;
int64_t sse;
int i;
- const __m256i zero_reg = _mm256_set1_epi16(0);
+ const __m256i zero_reg = _mm256_setzero_si256();
// init sse and ssz registerd to zero
- sse_reg = _mm256_set1_epi16(0);
- ssz_reg = _mm256_set1_epi16(0);
+ sse_reg = _mm256_setzero_si256();
+ ssz_reg = _mm256_setzero_si256();
for (i = 0; i < block_size; i += 16) {
// load 32 bytes from coeff and dqcoeff
- coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
- dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
+ read_coeff(coeff, i, &coeff_reg);
+ read_coeff(dqcoeff, i, &dqcoeff_reg);
// dqcoeff - coeff
dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg);
// madd (dqcoeff - coeff)
diff --git a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
index b56eed518..cab36f2bd 100644
--- a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -113,25 +113,13 @@ static void fdct4x4_sse4_1(__m128i *in, int bit) {
in[3] = _mm_unpackhi_epi64(v1, v3);
}
-static INLINE void write_buffer_4x4(__m128i *res, tran_low_t *output) {
+static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) {
_mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
_mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
_mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
_mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
}
-// Note:
-// We implement av1_fwd_txfm2d_4x4(). This function is kept here since
-// av1_highbd_fht4x4_c() is not removed yet
-void av1_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
- (void)input;
- (void)output;
- (void)stride;
- (void)tx_type;
- assert(0);
-}
-
static void fadst4x4_sse4_1(__m128i *in, int bit) {
const int32_t *cospi = cospi_arr(bit);
const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
@@ -416,7 +404,7 @@ static INLINE void col_txfm_8x8_rounding(__m128i *in, int shift) {
in[15] = _mm_srai_epi32(in[15], shift);
}
-static INLINE void write_buffer_8x8(const __m128i *res, tran_low_t *output) {
+static INLINE void write_buffer_8x8(const __m128i *res, int32_t *output) {
_mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
_mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
_mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
@@ -1800,7 +1788,7 @@ static void col_txfm_16x16_rounding(__m128i *in, int shift) {
col_txfm_8x8_rounding(&in[48], shift);
}
-static void write_buffer_16x16(const __m128i *in, tran_low_t *output) {
+static void write_buffer_16x16(const __m128i *in, int32_t *output) {
const int size_8x8 = 16 * 4;
write_buffer_8x8(&in[0], output);
output += size_8x8;
diff --git a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
index 8495ad1aa..af8e9a5f4 100644
--- a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
+++ b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
@@ -18,51 +18,6 @@
#include "aom_dsp/txfm_common.h"
#include "aom_dsp/x86/txfm_common_avx2.h"
-static int32_t get_16x16_sum(const int16_t *input, int stride) {
- __m256i r0, r1, r2, r3, u0, u1;
- __m256i zero = _mm256_setzero_si256();
- __m256i sum = _mm256_setzero_si256();
- const int16_t *blockBound = input + (stride << 4);
- __m128i v0, v1;
-
- while (input < blockBound) {
- r0 = _mm256_loadu_si256((__m256i const *)input);
- r1 = _mm256_loadu_si256((__m256i const *)(input + stride));
- r2 = _mm256_loadu_si256((__m256i const *)(input + 2 * stride));
- r3 = _mm256_loadu_si256((__m256i const *)(input + 3 * stride));
-
- u0 = _mm256_add_epi16(r0, r1);
- u1 = _mm256_add_epi16(r2, r3);
- sum = _mm256_add_epi16(sum, u0);
- sum = _mm256_add_epi16(sum, u1);
-
- input += stride << 2;
- }
-
- // unpack 16 int16_t into 2x8 int32_t
- u0 = _mm256_unpacklo_epi16(zero, sum);
- u1 = _mm256_unpackhi_epi16(zero, sum);
- u0 = _mm256_srai_epi32(u0, 16);
- u1 = _mm256_srai_epi32(u1, 16);
- sum = _mm256_add_epi32(u0, u1);
-
- u0 = _mm256_srli_si256(sum, 8);
- u1 = _mm256_add_epi32(sum, u0);
-
- v0 = _mm_add_epi32(_mm256_extracti128_si256(u1, 1),
- _mm256_castsi256_si128(u1));
- v1 = _mm_srli_si128(v0, 4);
- v0 = _mm_add_epi32(v0, v1);
- return (int32_t)_mm_extract_epi32(v0, 0);
-}
-
-void aom_fdct16x16_1_avx2(const int16_t *input, tran_low_t *output,
- int stride) {
- int32_t dc = get_16x16_sum(input, stride);
- output[0] = (tran_low_t)(dc >> 1);
- _mm256_zeroupper();
-}
-
static INLINE void load_buffer_16x16(const int16_t *input, int stride,
int flipud, int fliplr, __m256i *in) {
if (!flipud) {
@@ -959,8 +914,12 @@ static void fidtx16_avx2(__m256i *in) {
#endif
void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m256i in[16];
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif
switch (tx_type) {
case DCT_DCT:
@@ -1084,22 +1043,6 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
_mm256_zeroupper();
}
-void aom_fdct32x32_1_avx2(const int16_t *input, tran_low_t *output,
- int stride) {
- // left and upper corner
- int32_t sum = get_16x16_sum(input, stride);
- // right and upper corner
- sum += get_16x16_sum(input + 16, stride);
- // left and lower corner
- sum += get_16x16_sum(input + (stride << 4), stride);
- // right and lower corner
- sum += get_16x16_sum(input + (stride << 4) + 16, stride);
-
- sum >>= 3;
- output[0] = (tran_low_t)sum;
- _mm256_zeroupper();
-}
-
static void mm256_vectors_swap(__m256i *a0, __m256i *a1, const int size) {
int i = 0;
__m256i temp;
@@ -1570,9 +1513,13 @@ static void fidtx32_avx2(__m256i *in0, __m256i *in1) {
#endif
void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride,
- int tx_type) {
+ TxfmParam *txfm_param) {
__m256i in0[32]; // left 32 columns
__m256i in1[32]; // right 32 columns
+ int tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+ assert(tx_type != MRC_DCT && "No avx2 32x32 implementation of MRC_DCT");
+#endif
switch (tx_type) {
case DCT_DCT: