diff options
Diffstat (limited to 'third_party/aom/av1/encoder/rdopt.c')
-rw-r--r-- | third_party/aom/av1/encoder/rdopt.c | 3417 |
1 files changed, 2505 insertions, 912 deletions
diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c index 2a537a06a..43b00b83b 100644 --- a/third_party/aom/av1/encoder/rdopt.c +++ b/third_party/aom/av1/encoder/rdopt.c @@ -63,7 +63,7 @@ #endif // CONFIG_PVQ #if CONFIG_PVQ || CONFIG_DAALA_DIST #include "av1/common/pvq.h" -#endif // CONFIG_PVQ || CONFIG_DAALA_DIST +#endif // CONFIG_PVQ || CONFIG_DIST_8X8 #if CONFIG_DUAL_FILTER #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS) #if USE_EXTRA_FILTER @@ -113,8 +113,14 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = { #endif // CONFIG_EXT_REFS #if CONFIG_EXT_REFS +#if CONFIG_EXT_COMP_REFS +#define SECOND_REF_FRAME_MASK \ + ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | (1 << GOLDEN_FRAME) | \ + (1 << LAST2_FRAME) | 0x01) // NOLINT +#else // !CONFIG_EXT_COMP_REFS #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01) -#else +#endif // CONFIG_EXT_COMP_REFS +#else // !CONFIG_EXT_REFS #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01) #endif // CONFIG_EXT_REFS @@ -126,6 +132,11 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = { #define FILTER_FAST_SEARCH 1 #endif // CONFIG_EXT_INTRA +// Setting this to 1 will disable trellis optimization within the +// transform search. Trellis optimization will still be applied +// in the final encode. +#define DISABLE_TRELLISQ_SEARCH 0 + const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert -7.7051, -3.2234, -3.6193, 3.4533 }; // horz @@ -191,6 +202,56 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { // TODO(zoeliu): May need to reconsider the order on the modes to check #if CONFIG_EXT_INTER + +#if CONFIG_COMPOUND_SINGLEREF + // Single ref comp mode + { SR_NEAREST_NEARMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_NEAREST_NEARMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEARMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEARMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_NEAREST_NEARMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEARMV, { ALTREF_FRAME, NONE_FRAME } }, + + /* + { SR_NEAREST_NEWMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_NEAREST_NEWMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEWMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_NEAREST_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_NEAREST_NEWMV, { ALTREF_FRAME, NONE_FRAME } },*/ + + { SR_NEAR_NEWMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_NEAR_NEWMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_NEAR_NEWMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_NEAR_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_NEAR_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_NEAR_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, + + { SR_ZERO_NEWMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_ZERO_NEWMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_ZERO_NEWMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_ZERO_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_ZERO_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_ZERO_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, + + { SR_NEW_NEWMV, { LAST_FRAME, NONE_FRAME } }, +#if CONFIG_EXT_REFS + { SR_NEW_NEWMV, { LAST2_FRAME, NONE_FRAME } }, + { SR_NEW_NEWMV, { LAST3_FRAME, NONE_FRAME } }, + { SR_NEW_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_EXT_REFS + { SR_NEW_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, + { SR_NEW_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, +#endif // CONFIG_COMPOUND_SINGLEREF + { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, #if CONFIG_EXT_REFS { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, @@ -202,6 +263,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, + { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, + { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #else // CONFIG_EXT_INTER @@ -217,6 +285,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, + { NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, + { NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #endif // CONFIG_EXT_INTER @@ -297,9 +372,43 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } }, + { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, + { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } }, + { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } }, + { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } }, + { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } }, + { ZERO_ZEROMV, { LAST_FRAME, LAST2_FRAME } }, + + { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } }, + { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, + { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } }, + { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } }, + { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } }, + { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } }, + { ZERO_ZEROMV, { LAST_FRAME, LAST3_FRAME } }, + + { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, + { ZERO_ZEROMV, { LAST_FRAME, GOLDEN_FRAME } }, + + { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { ZERO_ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS -#else // CONFIG_EXT_INTER +#else // !CONFIG_EXT_INTER { NEARMV, { LAST_FRAME, ALTREF_FRAME } }, { NEWMV, { LAST_FRAME, ALTREF_FRAME } }, @@ -321,6 +430,17 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { NEARMV, { LAST_FRAME, LAST2_FRAME } }, + { NEWMV, { LAST_FRAME, LAST2_FRAME } }, + { NEARMV, { LAST_FRAME, LAST3_FRAME } }, + { NEWMV, { LAST_FRAME, LAST3_FRAME } }, + { NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, + { NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, + { NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS { ZEROMV, { LAST_FRAME, ALTREF_FRAME } }, @@ -335,6 +455,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } }, { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } }, { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } }, + +#if CONFIG_EXT_COMP_REFS + { ZEROMV, { LAST_FRAME, LAST2_FRAME } }, + { ZEROMV, { LAST_FRAME, LAST3_FRAME } }, + { ZEROMV, { LAST_FRAME, GOLDEN_FRAME } }, + { ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } }, +#endif // CONFIG_EXT_COMP_REFS #endif // CONFIG_EXT_REFS #endif // CONFIG_EXT_INTER @@ -385,6 +512,35 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { #endif // CONFIG_EXT_INTER }; +static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = { + DC_PRED, H_PRED, V_PRED, +#if CONFIG_ALT_INTRA + SMOOTH_PRED, +#endif // CONFIG_ALT_INTRA + TM_PRED, +#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV + SMOOTH_V_PRED, SMOOTH_H_PRED, +#endif // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV + D135_PRED, D207_PRED, D153_PRED, D63_PRED, D117_PRED, D45_PRED, +}; + +#if CONFIG_CFL +static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = { + UV_DC_PRED, UV_H_PRED, UV_V_PRED, +#if CONFIG_ALT_INTRA + UV_SMOOTH_PRED, +#endif // CONFIG_ALT_INTRA + UV_TM_PRED, +#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV + UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED, +#endif // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV + UV_D135_PRED, UV_D207_PRED, UV_D153_PRED, + UV_D63_PRED, UV_D117_PRED, UV_D45_PRED, +}; +#else +#define uv_rd_search_mode_order intra_rd_search_mode_order +#endif // CONFIG_CFL + #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE static INLINE int write_uniform_cost(int n, int v) { const int l = get_unsigned_bits(n); @@ -404,7 +560,7 @@ static INLINE int write_uniform_cost(int n, int v) { #define FAST_EXT_TX_EDST_MARGIN 0.3 #if CONFIG_DAALA_DIST -static int od_compute_var_4x4(od_coeff *x, int stride) { +static int od_compute_var_4x4(uint16_t *x, int stride) { int sum; int s2; int i; @@ -420,7 +576,7 @@ static int od_compute_var_4x4(od_coeff *x, int stride) { s2 += t * t; } } - // TODO(yushin) : Check wheter any changes are required for high bit depth. + return (s2 - (sum * sum >> 4)) >> 4; } @@ -431,8 +587,8 @@ static int od_compute_var_4x4(od_coeff *x, int stride) { #define OD_DIST_LP_MID (5) #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2) -static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x, - od_coeff *y, od_coeff *e_lp, int stride) { +static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x, + uint16_t *y, od_coeff *e_lp, int stride) { double sum; int min_var; double mean_var; @@ -444,8 +600,7 @@ static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x, double vardist; vardist = 0; - OD_ASSERT(qm != OD_FLAT_QM); - (void)qm; + #if 1 min_var = INT_MAX; mean_var = 0; @@ -490,22 +645,61 @@ static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x, } // Note : Inputs x and y are in a pixel domain -static double od_compute_dist(int qm, int activity_masking, od_coeff *x, - od_coeff *y, int bsize_w, int bsize_h, - int qindex) { +static double od_compute_dist_common(int activity_masking, uint16_t *x, + uint16_t *y, int bsize_w, int bsize_h, + int qindex, od_coeff *tmp, + od_coeff *e_lp) { + int i, j; + double sum = 0; + const int mid = OD_DIST_LP_MID; + + for (j = 0; j < bsize_w; j++) { + e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j]; + e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] + + 2 * tmp[(bsize_h - 2) * bsize_w + j]; + } + for (i = 1; i < bsize_h - 1; i++) { + for (j = 0; j < bsize_w; j++) { + e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] + + tmp[(i - 1) * bsize_w + j] + + tmp[(i + 1) * bsize_w + j]; + } + } + for (i = 0; i < bsize_h; i += 8) { + for (j = 0; j < bsize_w; j += 8) { + sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j], + &y[i * bsize_w + j], &e_lp[i * bsize_w + j], + bsize_w); + } + } + /* Scale according to linear regression against SSE, for 8x8 blocks. */ + if (activity_masking) { + sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) + + (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0); + } else { + sum *= qindex >= 128 + ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128) + : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43) + : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43); + } + + return sum; +} + +static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w, + int bsize_h, int qindex) { int i; double sum; sum = 0; assert(bsize_w >= 8 && bsize_h >= 8); - if (qm == OD_FLAT_QM) { - for (i = 0; i < bsize_w * bsize_h; i++) { - double tmp; - tmp = x[i] - y[i]; - sum += tmp * tmp; - } - } else { +#if CONFIG_PVQ + int activity_masking = 1; +#else + int activity_masking = 0; +#endif + { int j; DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]); DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]); @@ -525,63 +719,242 @@ static double od_compute_dist(int qm, int activity_masking, od_coeff *x, e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1]; } } - for (j = 0; j < bsize_w; j++) { - e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j]; - e_lp[(bsize_h - 1) * bsize_w + j] = - mid * tmp[(bsize_h - 1) * bsize_w + j] + - 2 * tmp[(bsize_h - 2) * bsize_w + j]; - } - for (i = 1; i < bsize_h - 1; i++) { + sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, + qindex, tmp, e_lp); + } + return sum; +} + +static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w, + int bsize_h, int qindex) { + int i; + double sum; + sum = 0; + + assert(bsize_w >= 8 && bsize_h >= 8); + +#if CONFIG_PVQ + int activity_masking = 1; +#else + int activity_masking = 0; +#endif + { + int j; + DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]); + int mid = OD_DIST_LP_MID; + for (i = 0; i < bsize_h; i++) { for (j = 0; j < bsize_w; j++) { - e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] + - tmp[(i - 1) * bsize_w + j] + - tmp[(i + 1) * bsize_w + j]; + y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j]; } } - for (i = 0; i < bsize_h; i += 8) { - for (j = 0; j < bsize_w; j += 8) { - sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j], - &y[i * bsize_w + j], &e_lp[i * bsize_w + j], - bsize_w); + for (i = 0; i < bsize_h; i++) { + tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1]; + tmp[i * bsize_w + bsize_w - 1] = + mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2]; + for (j = 1; j < bsize_w - 1; j++) { + tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + + e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1]; } } - /* Scale according to linear regression against SSE, for 8x8 blocks. */ - if (activity_masking) { - sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) + - (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0); - } else { - sum *= qindex >= 128 - ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128) - : qindex <= 43 - ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43) - : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43); - } + sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, + qindex, tmp, e_lp); } return sum; } +#endif // CONFIG_DAALA_DIST + +#if CONFIG_DIST_8X8 +#define NEW_FUTURE_DIST 0 +int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd, + const uint8_t *src, int src_stride, const uint8_t *dst, + int dst_stride, const BLOCK_SIZE tx_bsize, int bsw, + int bsh, int visible_w, int visible_h, int qindex) { + int64_t d = 0; -int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst, - int dst_stride, int bsw, int bsh, int qm, - int use_activity_masking, int qindex) { +#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST int i, j; - int64_t d; - DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]); - DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]); - assert(qm == OD_HVS_QM); + DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]); + (void)cpi; + (void)tx_bsize; +#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST + +#if !CONFIG_HIGHBITDEPTH + (void)xd; +#endif + +#if !CONFIG_DAALA_DIST + (void)qindex; +#endif + +#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST + (void)xd; + (void)bsw, (void)bsh; + (void)visible_w, (void)visible_h; +#endif + +#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) + orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; + + if ((bsw == visible_w) && (bsh == visible_h)) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) + rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; + } else { + for (j = 0; j < visible_h; j++) + for (i = 0; i < visible_w; i++) + rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; + + if (visible_w < bsw) { + for (j = 0; j < bsh; j++) + for (i = visible_w; i < bsw; i++) + rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; + } + + if (visible_h < bsh) { + for (j = visible_h; j < bsh; j++) + for (i = 0; i < bsw; i++) + rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; + } + } + } else { +#endif + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; + + if ((bsw == visible_w) && (bsh == visible_h)) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i]; + } else { + for (j = 0; j < visible_h; j++) + for (i = 0; i < visible_w; i++) + rec[j * bsw + i] = dst[j * dst_stride + i]; - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; + if (visible_w < bsw) { + for (j = 0; j < bsh; j++) + for (i = visible_w; i < bsw; i++) + rec[j * bsw + i] = src[j * src_stride + i]; + } - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i]; + if (visible_h < bsh) { + for (j = visible_h; j < bsh; j++) + for (i = 0; i < bsw; i++) rec[j * bsw + i] = src[j * src_stride + i]; + } + } +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH +#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST + +#if CONFIG_DAALA_DIST + d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex); +#elif NEW_FUTURE_DIST + // Call new 8x8-wise distortion function here, for example + for (i = 0; i < bsh; i += 8) { + for (j = 0; j < bsw; j += 8) { + d += + av1_compute_dist_8x8(&orig[i * bsw + j], &rec[i * bsw + j], bsw, bsh); + } + } +#else + // Otherwise, MSE by default + unsigned sse; + // TODO(Any): Use even faster function which does not calculate variance + cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); + d = sse; +#endif // CONFIG_DAALA_DIST - d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh, - qindex); return d; } + +static int64_t av1_dist_8x8_diff(const MACROBLOCKD *xd, const uint8_t *src, + int src_stride, const int16_t *diff, + int diff_stride, int bsw, int bsh, + int visible_w, int visible_h, int qindex) { + int64_t d = 0; + +#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST + int i, j; + + DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]); + DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]); +#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST + +#if !CONFIG_HIGHBITDEPTH + (void)xd; +#endif + +#if !CONFIG_DAALA_DIST + (void)qindex; +#endif + +#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST + (void)xd; + (void)src, (void)src_stride; + (void)bsw, (void)bsh; + (void)visible_w, (void)visible_h; +#endif + +#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) + orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; + } else { +#endif + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + + if ((bsw == visible_w) && (bsh == visible_h)) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) diff16[j * bsw + i] = diff[j * diff_stride + i]; + } else { + for (j = 0; j < visible_h; j++) + for (i = 0; i < visible_w; i++) + diff16[j * bsw + i] = diff[j * diff_stride + i]; + + if (visible_w < bsw) { + for (j = 0; j < bsh; j++) + for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0; + } + + if (visible_h < bsh) { + for (j = visible_h; j < bsh; j++) + for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0; + } + } +#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST + +#if CONFIG_DAALA_DIST + d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex); +#elif NEW_FUTURE_DIST + // Call new 8x8-wise distortion function (with diff inpu) here, for example + for (i = 0; i < bsh; i += 8) { + for (j = 0; j < bsw; j += 8) { + d += av1_compute_dist_8x8_diff(&orig[i * bsw + j], &diff16[i * bsw + j], + bsw, bsh); + } + } +#else + // Otherwise, MSE by default + d = aom_sum_squares_2d_i16(diff, diff_stride, bsw, bsh); #endif // CONFIG_DAALA_DIST + return d; +} +#endif // CONFIG_DIST_8X8 + static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize, const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, @@ -892,11 +1265,11 @@ static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize, for (plane = plane_from; plane <= plane_to; ++plane) { struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 +#if CONFIG_CHROMA_SUB8X8 const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); #else const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); -#endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 +#endif // CONFIG_CHROMA_SUB8X8 unsigned int sse; int rate; @@ -1068,7 +1441,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, #if !CONFIG_VAR_TX && !CONFIG_SUPERTX // Check for consistency of tx_size with mode info - assert(tx_size == get_tx_size(plane, xd)); + assert(tx_size == av1_get_tx_size(plane, xd)); #endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX (void)cm; @@ -1144,10 +1517,12 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, #endif // !CONFIG_LV_MAP int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, - int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order, - const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l, - int use_fast_coef_costing) { + int blk_row, int blk_col, int block, TX_SIZE tx_size, + const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a, + const ENTROPY_CONTEXT *l, int use_fast_coef_costing) { #if !CONFIG_LV_MAP + (void)blk_row; + (void)blk_col; const AV1_COMMON *const cm = &cpi->common; return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l, use_fast_coef_costing); @@ -1158,13 +1533,11 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct macroblockd_plane *pd = &xd->plane[plane]; const BLOCK_SIZE bsize = mbmi->sb_type; -#if CONFIG_CB4X4 -#if CONFIG_CHROMA_2X2 - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); -#else +#if CONFIG_CHROMA_SUB8X8 const BLOCK_SIZE plane_bsize = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); -#endif // CONFIG_CHROMA_2X2 +#elif CONFIG_CB4X4 + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); #else // CONFIG_CB4X4 const BLOCK_SIZE plane_bsize = get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd); @@ -1172,7 +1545,8 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, TXB_CTX txb_ctx; get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); - return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx); + return av1_cost_coeffs_txb(cpi, x, plane, blk_row, blk_col, block, tx_size, + &txb_ctx); #endif // !CONFIG_LV_MAP } #endif // !CONFIG_PVQ || CONFIG_VAR_TX @@ -1182,9 +1556,9 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane, BLOCK_SIZE plane_bsize, int blk_row, int blk_col, BLOCK_SIZE tx_bsize, int *width, int *height, int *visible_width, int *visible_height) { -#if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT) +#if !(CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)) assert(tx_bsize <= plane_bsize); -#endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT) +#endif int txb_height = block_size_high[tx_bsize]; int txb_width = block_size_wide[tx_bsize]; const int block_height = block_size_high[plane_bsize]; @@ -1208,19 +1582,31 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane, clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height); } -// Compute the pixel domain sum square error on all visible 4x4s in the +// Compute the pixel domain distortion from src and dst on all visible 4x4s in +// the // transform block. -static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd, - int plane, const uint8_t *src, const int src_stride, - const uint8_t *dst, const int dst_stride, int blk_row, - int blk_col, const BLOCK_SIZE plane_bsize, - const BLOCK_SIZE tx_bsize) { +static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x, + int plane, const uint8_t *src, const int src_stride, + const uint8_t *dst, const int dst_stride, + int blk_row, int blk_col, + const BLOCK_SIZE plane_bsize, + const BLOCK_SIZE tx_bsize) { int txb_rows, txb_cols, visible_rows, visible_cols; + const MACROBLOCKD *xd = &x->e_mbd; + get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, &txb_cols, &txb_rows, &visible_cols, &visible_rows); assert(visible_rows > 0); assert(visible_cols > 0); -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + +#if CONFIG_DIST_8X8 + if (plane == 0 && txb_cols >= 8 && txb_rows >= 8) + return av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, tx_bsize, + txb_cols, txb_rows, visible_cols, visible_rows, + x->qindex); +#endif // CONFIG_DIST_8X8 + +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) if ((txb_rows == visible_rows && txb_cols == visible_cols) && tx_bsize < BLOCK_SIZES) { #else @@ -1242,36 +1628,86 @@ static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd, return sse; } -// Compute the squares sum squares on all visible 4x4s in the transform block. -static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane, - const int16_t *diff, const int diff_stride, - int blk_row, int blk_col, - const BLOCK_SIZE plane_bsize, - const BLOCK_SIZE tx_bsize) { +// Compute the pixel domain distortion from diff on all visible 4x4s in the +// transform block. +static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane, + const int16_t *diff, const int diff_stride, + int blk_row, int blk_col, + const BLOCK_SIZE plane_bsize, + const BLOCK_SIZE tx_bsize) { int visible_rows, visible_cols; + const MACROBLOCKD *xd = &x->e_mbd; +#if CONFIG_DIST_8X8 + int txb_height = block_size_high[tx_bsize]; + int txb_width = block_size_wide[tx_bsize]; + const int src_stride = x->plane[plane].src.stride; + const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0]; + const uint8_t *src = &x->plane[plane].src.buf[src_idx]; +#endif + get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL, NULL, &visible_cols, &visible_rows); - return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows); + +#if CONFIG_DIST_8X8 + if (plane == 0 && txb_width >= 8 && txb_height >= 8) + return av1_dist_8x8_diff(xd, src, src_stride, diff, diff_stride, txb_width, + txb_height, visible_cols, visible_rows, x->qindex); + else +#endif + return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, + visible_rows); +} + +#if CONFIG_PALETTE || CONFIG_INTRABC +int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) { + int val_count[256]; + memset(val_count, 0, sizeof(val_count)); + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + ++val_count[src[r * stride + c]]; + } + } + int n = 0; + for (int i = 0; i < 256; ++i) { + if (val_count[i]) ++n; + } + return n; } +#if CONFIG_HIGHBITDEPTH +int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, + int bit_depth) { + assert(bit_depth <= 12); + const uint16_t *src = CONVERT_TO_SHORTPTR(src8); + int val_count[1 << 12]; + memset(val_count, 0, (1 << 12) * sizeof(val_count[0])); + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + ++val_count[src[r * stride + c]]; + } + } + int n = 0; + for (int i = 0; i < (1 << bit_depth); ++i) { + if (val_count[i]) ++n; + } + return n; +} +#endif // CONFIG_HIGHBITDEPTH +#endif // CONFIG_PALETTE || CONFIG_INTRABC + void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col, TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse, OUTPUT_STATUS output_status) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; -#if CONFIG_DAALA_DIST - int qm = OD_HVS_QM; - int use_activity_masking = 0; -#if CONFIG_PVQ - use_activity_masking = x->daala_enc.use_activity_masking; -#endif // CONFIG_PVQ +#if CONFIG_DIST_8X8 struct macroblockd_plane *const pd = &xd->plane[plane]; -#else // CONFIG_DAALA_DIST +#else // CONFIG_DIST_8X8 const struct macroblockd_plane *const pd = &xd->plane[plane]; -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 - if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) { + if (cpi->sf.use_transform_domain_distortion && !CONFIG_DIST_8X8) { // Transform domain distortion computation is more efficient as it does // not involve an inverse transform, but it is less accurate. const int buffer_length = tx_size_2d[tx_size]; @@ -1292,19 +1728,21 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, &this_sse) >> shift; #endif // CONFIG_HIGHBITDEPTH -#elif CONFIG_HIGHBITDEPTH - const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; - *out_dist = - av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >> - shift; -#else - *out_dist = - av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift; +#else // !CONFIG_PVQ +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, + &this_sse, xd->bd) >> + shift; + else +#endif + *out_dist = + av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift; #endif // CONFIG_PVQ *out_sse = this_sse >> shift; } else { const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; -#if !CONFIG_PVQ || CONFIG_DAALA_DIST +#if !CONFIG_PVQ || CONFIG_DIST_8X8 const int bsw = block_size_wide[tx_bsize]; const int bsh = block_size_high[tx_bsize]; #endif @@ -1323,34 +1761,13 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, assert(cpi != NULL); assert(tx_size_wide_log2[0] == tx_size_high_log2[0]); -#if CONFIG_DAALA_DIST - if (plane == 0 && bsw >= 8 && bsh >= 8) { - if (output_status == OUTPUT_HAS_DECODED_PIXELS) { - const int pred_stride = block_size_wide[plane_bsize]; - const int pred_idx = (blk_row * pred_stride + blk_col) - << tx_size_wide_log2[0]; - const int16_t *pred = &pd->pred[pred_idx]; - int i, j; - DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]); - - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) - pred8[j * bsw + i] = pred[j * pred_stride + i]; - *out_sse = av1_daala_dist(src, src_stride, pred8, bsw, bsw, bsh, qm, - use_activity_masking, x->qindex); - } else { - *out_sse = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh, - qm, use_activity_masking, x->qindex); - } - } else -#endif // CONFIG_DAALA_DIST { const int diff_stride = block_size_wide[plane_bsize]; const int diff_idx = (blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]; const int16_t *diff = &p->src_diff[diff_idx]; - *out_sse = sum_squares_visible(xd, plane, diff, diff_stride, blk_row, - blk_col, plane_bsize, tx_bsize); + *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col, + plane_bsize, tx_bsize); #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2); @@ -1360,15 +1777,8 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, if (eob) { if (output_status == OUTPUT_HAS_DECODED_PIXELS) { -#if CONFIG_DAALA_DIST - if (plane == 0 && bsw >= 8 && bsh >= 8) - *out_dist = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh, - qm, use_activity_masking, x->qindex); - else -#endif // CONFIG_DAALA_DIST - *out_dist = - pixel_sse(cpi, xd, plane, src, src_stride, dst, dst_stride, - blk_row, blk_col, plane_bsize, tx_bsize); + *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride, + blk_row, blk_col, plane_bsize, tx_bsize); } else { #if CONFIG_HIGHBITDEPTH uint8_t *recon; @@ -1399,37 +1809,44 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, #endif // !CONFIG_PVQ const PLANE_TYPE plane_type = get_plane_type(plane); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - - av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, recon, - MAX_TX_SIZE, eob); - -#if CONFIG_DAALA_DIST - if (plane == 0 && bsw >= 8 && bsh >= 8) { - *out_dist = av1_daala_dist(src, src_stride, recon, MAX_TX_SIZE, bsw, - bsh, qm, use_activity_masking, x->qindex); - } else { - if (plane == 0) { - // Save decoded pixels for inter block in pd->pred to avoid - // block_8x8_rd_txfm_daala_dist() need to produce them - // by calling av1_inverse_transform_block() again. - const int pred_stride = block_size_wide[plane_bsize]; - const int pred_idx = (blk_row * pred_stride + blk_col) - << tx_size_wide_log2[0]; - int16_t *pred = &pd->pred[pred_idx]; - int i, j; + TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); + av1_inverse_transform_block(xd, dqcoeff, +#if CONFIG_LGT + xd->mi[0]->mbmi.mode, +#endif + tx_type, tx_size, recon, MAX_TX_SIZE, eob); + +#if CONFIG_DIST_8X8 + if (plane == 0 && (bsw < 8 || bsh < 8)) { + // Save decoded pixels for inter block in pd->pred to avoid + // block_8x8_rd_txfm_daala_dist() need to produce them + // by calling av1_inverse_transform_block() again. + const int pred_stride = block_size_wide[plane_bsize]; + const int pred_idx = (blk_row * pred_stride + blk_col) + << tx_size_wide_log2[0]; + int16_t *pred = &pd->pred[pred_idx]; + int i, j; +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bsh; j++) + for (i = 0; i < bsw; i++) + pred[j * pred_stride + i] = + CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i]; + } else { +#endif for (j = 0; j < bsh; j++) for (i = 0; i < bsw; i++) pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i]; +#if CONFIG_HIGHBITDEPTH } -#endif // CONFIG_DAALA_DIST - *out_dist = - pixel_sse(cpi, xd, plane, src, src_stride, recon, MAX_TX_SIZE, - blk_row, blk_col, plane_bsize, tx_bsize); -#if CONFIG_DAALA_DIST +#endif // CONFIG_HIGHBITDEPTH } -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 + *out_dist = + pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE, + blk_row, blk_col, plane_bsize, tx_bsize); } *out_dist *= 16; } else { @@ -1453,33 +1870,25 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, int64_t rd1, rd2, rd; RD_STATS this_rd_stats; - assert(tx_size == get_tx_size(plane, xd)); +#if !CONFIG_SUPERTX && !CONFIG_VAR_TX + assert(tx_size == av1_get_tx_size(plane, xd)); +#endif // !CONFIG_SUPERTX av1_init_rd_stats(&this_rd_stats); if (args->exit_early) return; if (!is_inter_block(mbmi)) { -#if CONFIG_CFL - -#if CONFIG_EC_ADAPT - FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *const ec_ctx = cm->fc; -#endif // CONFIG_EC_ADAPT - - av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col, - blk_row, tx_size, plane_bsize); -#else av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size); -#endif #if CONFIG_DPCM_INTRA const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block); - const PREDICTION_MODE mode = - (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode; - TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV, - xd, block, tx_size); + const PREDICTION_MODE mode = (plane == AOM_PLANE_Y) + ? get_y_mode(xd->mi[0], block_raster_idx) + : get_uv_mode(mbmi->uv_mode); + TX_TYPE tx_type = + av1_get_tx_type((plane == AOM_PLANE_Y) ? PLANE_TYPE_Y : PLANE_TYPE_UV, + xd, blk_row, blk_col, block, tx_size); if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) { int8_t skip; av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col, @@ -1496,9 +1905,36 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, #if !CONFIG_TXK_SEL // full forward transform and quantization const int coeff_ctx = combine_entropy_contexts(*a, *l); +#if DISABLE_TRELLISQ_SEARCH + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + coeff_ctx, AV1_XFORM_QUANT_B); +#else av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); + + const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; + tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); + const int buffer_length = tx_size_2d[tx_size]; + int64_t tmp_dist; + int64_t tmp; +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + tmp_dist = + av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >> + shift; + else +#endif + tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift; + + if (RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) { + av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, + a, l); + } else { + args->exit_early = 1; + return; + } +#endif // DISABLE_TRELLISQ_SEARCH if (!is_inter_block(mbmi)) { struct macroblock_plane *const p = &x->plane[plane]; @@ -1518,25 +1954,27 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, const int dst_stride = pd->dst.stride; uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size); + // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is + // intra predicted. + cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); } #endif #if CONFIG_DPCM_INTRA CALCULATE_RD : {} #endif // CONFIG_DPCM_INTRA - rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist); + rd = RDCOST(x->rdmult, 0, this_rd_stats.dist); if (args->this_rd + rd > args->best_rd) { args->exit_early = 1; return; } #if !CONFIG_PVQ const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const SCAN_ORDER *scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + const TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); + const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi); this_rd_stats.rate = - av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, - args->use_fast_coef_costing); + av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size, + scan_order, a, l, args->use_fast_coef_costing); #else // !CONFIG_PVQ this_rd_stats.rate = x->rate; #endif // !CONFIG_PVQ @@ -1554,22 +1992,12 @@ CALCULATE_RD : {} av1_set_txb_context(x, plane, block, tx_size, a, l); #endif // !CONFIG_PVQ - rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist); - rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse); + rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist); + rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse); // TODO(jingning): temporarily enabled only for luma component rd = AOMMIN(rd1, rd2); -#if CONFIG_DAALA_DIST - if (plane == 0 && plane_bsize >= BLOCK_8X8 && - (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) { - this_rd_stats.dist = 0; - this_rd_stats.sse = 0; - rd = 0; - x->rate_4x4[block] = this_rd_stats.rate; - } -#endif // CONFIG_DAALA_DIST - #if !CONFIG_PVQ this_rd_stats.skip &= !x->plane[plane].eobs[block]; #else @@ -1579,113 +2007,93 @@ CALCULATE_RD : {} args->this_rd += rd; - if (args->this_rd > args->best_rd) { - args->exit_early = 1; - return; +#if CONFIG_DIST_8X8 + if (!(plane == 0 && plane_bsize >= BLOCK_8X8 && + (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))) { +#endif + if (args->this_rd > args->best_rd) { + args->exit_early = 1; + return; + } +#if CONFIG_DIST_8X8 } +#endif } -#if CONFIG_DAALA_DIST -static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row, - int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - struct rdcost_block_args *args = arg; - MACROBLOCK *const x = args->x; +#if CONFIG_DIST_8X8 +static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, + struct rdcost_block_args *args) { MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblockd_plane *const pd = &xd->plane[0]; + const struct macroblock_plane *const p = &x->plane[0]; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - int64_t rd, rd1, rd2; - RD_STATS this_rd_stats; - int qm = OD_HVS_QM; - int use_activity_masking = 0; - - (void)tx_size; - - assert(plane == 0); - assert(plane_bsize >= BLOCK_8X8); -#if CONFIG_PVQ - use_activity_masking = x->daala_enc.use_activity_masking; -#endif // CONFIG_PVQ - av1_init_rd_stats(&this_rd_stats); + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + const uint8_t *src = &p->src.buf[0]; + const uint8_t *dst = &pd->dst.buf[0]; + const int16_t *pred = &pd->pred[0]; + const int bw = block_size_wide[bsize]; + const int bh = block_size_high[bsize]; - if (args->exit_early) return; + int i, j; + int64_t rd, rd1, rd2; + unsigned int tmp1, tmp2; + int qindex = x->qindex; - { - const struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; + assert((bw & 0x07) == 0); + assert((bh & 0x07) == 0); - const int src_stride = p->src.stride; - const int dst_stride = pd->dst.stride; - const int diff_stride = block_size_wide[plane_bsize]; +#if CONFIG_HIGHBITDEPTH + uint8_t *pred8; + DECLARE_ALIGNED(16, uint16_t, pred16[MAX_TX_SQUARE]); - const uint8_t *src = - &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; - const uint8_t *dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + pred8 = CONVERT_TO_BYTEPTR(pred16); + else + pred8 = (uint8_t *)pred16; +#else + DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]); +#endif // CONFIG_HIGHBITDEPTH - unsigned int tmp1, tmp2; - int qindex = x->qindex; - const int pred_stride = block_size_wide[plane_bsize]; - const int pred_idx = (blk_row * pred_stride + blk_col) - << tx_size_wide_log2[0]; - int16_t *pred = &pd->pred[pred_idx]; - int i, j; - const int tx_blk_size = 8; - - DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]); - - for (j = 0; j < tx_blk_size; j++) - for (i = 0; i < tx_blk_size; i++) - pred8[j * tx_blk_size + i] = pred[j * diff_stride + i]; - - tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm, - use_activity_masking, qindex); - tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm, - use_activity_masking, qindex); - - if (!is_inter_block(mbmi)) { - this_rd_stats.sse = (int64_t)tmp1 * 16; - this_rd_stats.dist = (int64_t)tmp2 * 16; - } else { - // For inter mode, the decoded pixels are provided in pd->pred, - // while the predicted pixels are in dst. - this_rd_stats.sse = (int64_t)tmp2 * 16; - this_rd_stats.dist = (int64_t)tmp1 * 16; - } +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bh; j++) + for (i = 0; i < bw; i++) + CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i]; + } else { +#endif + for (j = 0; j < bh; j++) + for (i = 0; i < bw; i++) pred8[j * bw + i] = pred[j * bw + i]; +#if CONFIG_HIGHBITDEPTH } +#endif // CONFIG_HIGHBITDEPTH - rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist); - if (args->this_rd + rd > args->best_rd) { - args->exit_early = 1; - return; + tmp1 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, bw, bsize, bw, bh, bw, + bh, qindex); + tmp2 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, bsize, bw, bh, + bw, bh, qindex); + + if (!is_inter_block(mbmi)) { + args->rd_stats.sse = (int64_t)tmp1 * 16; + args->rd_stats.dist = (int64_t)tmp2 * 16; + } else { + // For inter mode, the decoded pixels are provided in pd->pred, + // while the predicted pixels are in dst. + args->rd_stats.sse = (int64_t)tmp2 * 16; + args->rd_stats.dist = (int64_t)tmp1 * 16; } - { - const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); - const uint8_t txw_unit = tx_size_wide_unit[tx_size]; - const uint8_t txh_unit = tx_size_high_unit[tx_size]; - const int step = txw_unit * txh_unit; - int offset_h = tx_size_high_unit[TX_4X4]; - // The rate of the current 8x8 block is the sum of four 4x4 blocks in it. - this_rd_stats.rate = - x->rate_4x4[block - max_blocks_wide * offset_h - step] + - x->rate_4x4[block - max_blocks_wide * offset_h] + - x->rate_4x4[block - step] + x->rate_4x4[block]; - } - rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist); - rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse); + rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist); + rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse); rd = AOMMIN(rd1, rd2); - args->rd_stats.dist += this_rd_stats.dist; - args->rd_stats.sse += this_rd_stats.sse; - - args->this_rd += rd; + args->rd_stats.rdcost = rd; + args->this_rd = rd; - if (args->this_rd > args->best_rd) { - args->exit_early = 1; - return; - } + if (args->this_rd > args->best_rd) args->exit_early = 1; } -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, RD_STATS *rd_stats, int64_t ref_best_rd, int plane, @@ -1705,15 +2113,13 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); -#if CONFIG_DAALA_DIST - if (plane == 0 && bsize >= BLOCK_8X8 && + av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, + &args); +#if CONFIG_DIST_8X8 + if (!args.exit_early && plane == 0 && bsize >= BLOCK_8X8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) - av1_foreach_8x8_transformed_block_in_yplane( - xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args); - else -#endif // CONFIG_DAALA_DIST - av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, - &args); + dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args); +#endif if (args.exit_early) { av1_invalid_rd_stats(rd_stats); @@ -1768,8 +2174,14 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x, const MACROBLOCKD *const xd = &x->e_mbd; const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const int tx_select = - cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8; + const int tx_select = cm->tx_mode == TX_MODE_SELECT && +#if CONFIG_EXT_PARTITION_TYPES + // Currently these block shapes can only use 4x4 + // transforms + mbmi->sb_type != BLOCK_4X16 && + mbmi->sb_type != BLOCK_16X4 && +#endif + mbmi->sb_type >= BLOCK_8X8; if (tx_select) { const int is_inter = is_inter_block(mbmi); @@ -1779,11 +2191,11 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x, const int depth = tx_size_to_depth(coded_tx_size); const int tx_size_ctx = get_tx_size_context(xd); int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth]; -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size) r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob, tx_size == quarter_txsize_lookup[bsize]); -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif return r_tx_size; } else { return 0; @@ -1796,6 +2208,10 @@ int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd, TX_TYPE tx_type) { if (plane > 0) return 0; +#if CONFIG_VAR_TX + tx_size = get_min_tx_size(tx_size); +#endif + const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_inter = is_inter_block(mbmi); #if CONFIG_EXT_TX @@ -1844,6 +2260,9 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size); +#if CONFIG_PVQ + assert(tx_size >= TX_4X4); +#endif // CONFIG_PVQ assert(skip_prob > 0); #if CONFIG_EXT_TX && CONFIG_RECT_TX assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs))); @@ -1864,21 +2283,20 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, if (rd_stats->skip) { if (is_inter) { - rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse); + rd = RDCOST(x->rdmult, s1, rd_stats->sse); } else { - rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, - rd_stats->sse); + rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse); } } else { - rd = RDCOST(x->rdmult, x->rddiv, - rd_stats->rate + s0 + r_tx_size * tx_select, rd_stats->dist); + rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select, + rd_stats->dist); } if (tx_select) rd_stats->rate += r_tx_size; if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(rd_stats->skip)) - rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse)); + rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse)); return rd; } @@ -1895,6 +2313,11 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs, // transforms should be considered for pruning prune = prune_tx_types(cpi, bs, x, xd, -1); +#if CONFIG_MRC_TX + // MRC_DCT only implemented for TX_32X32 so only include this tx in + // the search for TX_32X32 + if (tx_type == MRC_DCT && tx_size != TX_32X32) return 1; +#endif // CONFIG_MRC_TX if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1; if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size)) return 1; @@ -1929,7 +2352,8 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs, return 0; } -#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT) +#if CONFIG_EXT_INTER && \ + (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA) static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs, MACROBLOCK *x, int *r, int64_t *d, int *s, int64_t *sse, int64_t ref_best_rd) { @@ -2020,14 +2444,13 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type); if (this_rd_stats.skip) - this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse); + this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse); else - this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0, - this_rd_stats.dist); + this_rd = + RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist); if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip) - this_rd = - AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse)); + this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse)); if (this_rd < best_rd) { best_rd = this_rd; @@ -2068,13 +2491,12 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, continue; } if (this_rd_stats.skip) - this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse); + this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse); else - this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0, - this_rd_stats.dist); - if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip) this_rd = - AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse)); + RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist); + if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip) + this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse)); if (this_rd < best_rd) { best_rd = this_rd; @@ -2129,7 +2551,6 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, TX_TYPE best_tx_type = DCT_DCT; #if CONFIG_TXK_SEL TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; - const int num_blk = bsize_to_num_blk(bs); #endif // CONFIG_TXK_SEL const int tx_select = cm->tx_mode == TX_MODE_SELECT; const int is_inter = is_inter_block(mbmi); @@ -2171,8 +2592,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, rect_tx_size); if (rd < best_rd) { #if CONFIG_TXK_SEL - memcpy(best_txk_type, mbmi->txk_type, - sizeof(best_txk_type[0]) * num_blk); + memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256); #endif best_tx_type = tx_type; best_tx_size = rect_tx_size; @@ -2278,8 +2698,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, last_rd = rd; if (rd < best_rd) { #if CONFIG_TXK_SEL - memcpy(best_txk_type, mbmi->txk_type, - sizeof(best_txk_type[0]) * num_blk); + memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256); #endif best_tx_type = tx_type; best_tx_size = n; @@ -2295,7 +2714,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, mbmi->tx_size = best_tx_size; mbmi->tx_type = best_tx_type; #if CONFIG_TXK_SEL - memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * num_blk); + memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * 256); #endif #if CONFIG_VAR_TX @@ -2366,21 +2785,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, int block = 0; for (row = 0; row < max_blocks_high; row += stepr) { for (col = 0; col < max_blocks_wide; col += stepc) { -#if CONFIG_CFL - const struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - -#if CONFIG_EC_ADAPT - FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *const ec_ctx = cpi->common.fc; -#endif // CONFIG_EC_ADAPT - - av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row, - tx_size, plane_bsize); -#else av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size); -#endif block += step; } } @@ -2388,7 +2793,8 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate, &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse); #if CONFIG_EXT_INTRA - if (av1_is_directional_mode(mbmi->mode, bsize)) { + if (av1_is_directional_mode(mbmi->mode, bsize) && + av1_use_angle_delta(bsize)) { mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[0]); } @@ -2405,8 +2811,8 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, } } #endif // CONFIG_FILTER_INTRA - this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + mode_cost, - this_rd_stats.dist); + this_rd = + RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist); return this_rd; } @@ -2620,7 +3026,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); if (tokenonly_rd_stats.rate == INT_MAX) continue; this_rate = tokenonly_rd_stats.rate + palette_mode_cost; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) { tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size); } @@ -2773,15 +3179,17 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( src_stride, dst, dst_stride, xd->bd); #endif if (is_lossless) { - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size); - const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0); + TX_TYPE tx_type = + av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size); + const SCAN_ORDER *scan_order = + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); #if !CONFIG_PVQ av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, - tempa + idx, templ + idy, + ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, + scan_order, tempa + idx, templ + idy, cpi->sf.use_fast_coef_costing); skip = (p->eobs[block] == 0); can_skip &= skip; @@ -2806,28 +3214,38 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( templ[idy] = !skip; can_skip &= skip; #endif - if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) + if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next_highbd; #if CONFIG_PVQ if (!skip) #endif av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block), +#if CONFIG_LGT + mode, +#endif DCT_DCT, tx_size, dst, dst_stride, p->eobs[block]); } else { int64_t dist; unsigned int tmp; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size); - const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0); + TX_TYPE tx_type = + av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size); + const SCAN_ORDER *scan_order = + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); #if !CONFIG_PVQ +#if DISABLE_TRELLISQ_SEARCH + av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8, + tx_size, coeff_ctx, AV1_XFORM_QUANT_B); +#else av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx, - templ + idy); - ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, - tempa + idx, templ + idy, + av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, + tempa + idx, templ + idy); +#endif // DISABLE_TRELLISQ_SEARCH + ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, + scan_order, tempa + idx, templ + idy, cpi->sf.use_fast_coef_costing); skip = (p->eobs[block] == 0); can_skip &= skip; @@ -2855,19 +3273,22 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( if (!skip) #endif av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block), +#if CONFIG_LGT + mode, +#endif tx_type, tx_size, dst, dst_stride, p->eobs[block]); cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp); dist = (int64_t)tmp << 4; distortion += dist; - if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) + if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next_highbd; } } } rate += ratey; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; @@ -2966,14 +3387,24 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride, dst, dst_stride); #endif // !CONFIG_PVQ - - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size); - const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0); + TX_TYPE tx_type = + av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size); + const SCAN_ORDER *scan_order = + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); #if CONFIG_CB4X4 block = 4 * block; #endif // CONFIG_CB4X4 #if !CONFIG_PVQ +#if DISABLE_TRELLISQ_SEARCH + av1_xform_quant(cm, x, 0, block, +#if CONFIG_CB4X4 + 2 * (row + idy), 2 * (col + idx), +#else + row + idy, col + idx, +#endif // CONFIG_CB4X4 + BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_B); +#else const AV1_XFORM_QUANT xform_quant = is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP; av1_xform_quant(cm, x, 0, block, @@ -2984,12 +3415,12 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( #endif // CONFIG_CB4X4 BLOCK_8X8, tx_size, coeff_ctx, xform_quant); - av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx, + av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, tempa + idx, templ + idy); - - ratey += - av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx, - templ + idy, cpi->sf.use_fast_coef_costing); +#endif // DISABLE_TRELLISQ_SEARCH + ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, scan_order, + tempa + idx, templ + idy, + cpi->sf.use_fast_coef_costing); skip = (p->eobs[block] == 0); can_skip &= skip; tempa[idx] = !skip; @@ -3028,6 +3459,9 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( if (!skip) #endif // CONFIG_PVQ av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block), +#if CONFIG_LGT + mode, +#endif tx_type, tx_size, dst, dst_stride, p->eobs[block]); unsigned int tmp; @@ -3036,14 +3470,16 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( distortion += dist; } - if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) - goto next; + if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next; if (is_lossless) { // Calculate inverse txfm *after* RD cost. #if CONFIG_PVQ if (!skip) #endif // CONFIG_PVQ av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block), +#if CONFIG_LGT + mode, +#endif DCT_DCT, tx_size, dst, dst_stride, p->eobs[block]); } @@ -3051,7 +3487,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( } rate += ratey; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); + this_rd = RDCOST(x->rdmult, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; @@ -3153,9 +3589,9 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, cpi, mb, idy, idx, &best_mode, bmode_costs, xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r, &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd); -#if !CONFIG_DAALA_DIST +#if !CONFIG_DIST_8X8 if (this_rd >= best_rd - total_rd) return INT64_MAX; -#endif // !CONFIG_DAALA_DIST +#endif // !CONFIG_DIST_8X8 total_rd += this_rd; cost += r; total_distortion += d; @@ -3172,7 +3608,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, } mbmi->mode = mic->bmi[3].as_mode; -#if CONFIG_DAALA_DIST +#if CONFIG_DIST_8X8 { const struct macroblock_plane *p = &mb->plane[0]; const struct macroblockd_plane *pd = &xd->plane[0]; @@ -3180,18 +3616,16 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, const int dst_stride = pd->dst.stride; uint8_t *src = p->src.buf; uint8_t *dst = pd->dst.buf; - int use_activity_masking = 0; - int qm = OD_HVS_QM; #if CONFIG_PVQ use_activity_masking = mb->daala_enc.use_activity_masking; #endif // CONFIG_PVQ // Daala-defined distortion computed for the block of 8x8 pixels - total_distortion = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, - qm, use_activity_masking, mb->qindex) + total_distortion = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, + BLOCK_8X8, 8, 8, 8, 8, mb->qindex) << 4; } -#endif // CONFIG_DAALA_DIST +#endif // CONFIG_DIST_8X8 // Add in the cost of the transform type if (!is_lossless) { int rate_tx_type = 0; @@ -3218,7 +3652,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, *rate_y = tot_rate_y; *distortion = total_distortion; - return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); + return RDCOST(mb->rdmult, cost, total_distortion); } #if CONFIG_FILTER_INTRA @@ -3261,7 +3695,7 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, this_rate = tokenonly_rd_stats.rate + av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) + write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; @@ -3321,7 +3755,7 @@ static int64_t calc_rd_given_intra_angle( this_rate = tokenonly_rd_stats.rate + mode_cost + write_uniform_cost(2 * max_angle_delta + 1, mbmi->angle_delta[0] + max_angle_delta); - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; @@ -3496,8 +3930,8 @@ static void angle_estimation(const uint8_t *src, int src_stride, int rows, uint8_t *directional_mode_skip_mask) { memset(directional_mode_skip_mask, 0, INTRA_MODES * sizeof(*directional_mode_skip_mask)); - // Sub-8x8 blocks do not use extra directions. - if (bsize < BLOCK_8X8) return; + // Check if angle_delta is used + if (!av1_use_angle_delta(bsize)) return; uint64_t hist[DIRECTIONAL_MODES]; memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0])); src += src_stride; @@ -3551,8 +3985,8 @@ static void highbd_angle_estimation(const uint8_t *src8, int src_stride, uint8_t *directional_mode_skip_mask) { memset(directional_mode_skip_mask, 0, INTRA_MODES * sizeof(*directional_mode_skip_mask)); - // Sub-8x8 blocks do not use extra directions. - if (bsize < BLOCK_8X8) return; + // Check if angle_delta is used + if (!av1_use_angle_delta(bsize)) return; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint64_t hist[DIRECTIONAL_MODES]; memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0])); @@ -3608,7 +4042,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize, int64_t best_rd) { - uint8_t mode_idx; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mi[0]; MB_MODE_INFO *const mbmi = &mic->mbmi; @@ -3683,7 +4116,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, x->use_default_intra_tx_type = 0; /* Y Search for intra prediction mode */ - for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) { + for (int mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) { RD_STATS this_rd_stats; int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd, this_model_rd; @@ -3692,7 +4125,8 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, mbmi->mode = best_mbmi.mode; x->use_default_intra_tx_type = 0; } else { - mbmi->mode = mode_idx; + assert(mode_idx < INTRA_MODES); + mbmi->mode = intra_rd_search_mode_order[mode_idx]; } #if CONFIG_PVQ od_encode_rollback(&x->daala_enc, &pre_buf); @@ -3708,7 +4142,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #if CONFIG_EXT_INTRA is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize); if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue; - if (is_directional_mode) { + if (is_directional_mode && av1_use_angle_delta(bsize)) { this_rd_stats.rate = INT_MAX; rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize, bmode_costs[mbmi->mode], best_rd, &best_model_rd); @@ -3754,11 +4188,13 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, this_rate += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter]; #endif // CONFIG_INTRA_INTERP - this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, - MAX_ANGLE_DELTA + mbmi->angle_delta[0]); + if (av1_use_angle_delta(bsize)) { + this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, + MAX_ANGLE_DELTA + mbmi->angle_delta[0]); + } } #endif // CONFIG_EXT_INTRA - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + this_rd = RDCOST(x->rdmult, this_rate, this_distortion); #if CONFIG_FILTER_INTRA if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) { filter_intra_mode_skip_mask ^= (1 << mbmi->mode); @@ -3785,16 +4221,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, od_encode_rollback(&x->daala_enc, &post_buf); #endif // CONFIG_PVQ -#if CONFIG_CFL - // Perform one extra txfm_rd_in_plane() call, this time with the best value so - // we can store reconstructed luma values - RD_STATS this_rd_stats; - x->cfl_store_y = 1; - txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize, - mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing); - x->cfl_store_y = 0; -#endif - #if CONFIG_PALETTE if (try_palette) { rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx, @@ -3826,7 +4252,7 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); + const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]); int plane; int is_cost_valid = 1; av1_init_rd_stats(rd_stats); @@ -3857,9 +4283,8 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, break; } av1_merge_rd_stats(rd_stats, &pn_rd_stats); - if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) > - ref_best_rd && - RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse) > ref_best_rd) { + if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd && + RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) { is_cost_valid = 0; break; } @@ -3875,13 +4300,6 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, } #if CONFIG_VAR_TX -// FIXME crop these calls -static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride, - TX_SIZE tx_size) { - return aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size], - tx_size_high[tx_size]); -} - void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, int blk_row, int blk_col, int plane, int block, int plane_bsize, const ENTROPY_CONTEXT *a, @@ -3890,18 +4308,23 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, MACROBLOCKD *xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; + +#if CONFIG_TXK_SEL + av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, + tx_size, a, l, 0, rd_stats); + return; +#endif + int64_t tmp; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); PLANE_TYPE plane_type = get_plane_type(plane); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + TX_TYPE tx_type = + av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); const SCAN_ORDER *const scan_order = - get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size]; int bh = block_size_high[txm_bsize]; int bw = block_size_wide[txm_bsize]; - int txb_h = tx_size_high_unit[tx_size]; - int txb_w = tx_size_wide_unit[tx_size]; - int src_stride = p->src.stride; uint8_t *src = &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; @@ -3914,30 +4337,15 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, #else DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]); #endif // CONFIG_HIGHBITDEPTH - int max_blocks_high = block_size_high[plane_bsize]; - int max_blocks_wide = block_size_wide[plane_bsize]; - const int diff_stride = max_blocks_wide; + const int diff_stride = block_size_wide[plane_bsize]; const int16_t *diff = &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]]; int txb_coeff_cost; assert(tx_size < TX_SIZES_ALL); - if (xd->mb_to_bottom_edge < 0) - max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y); - if (xd->mb_to_right_edge < 0) - max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x); - - max_blocks_high >>= tx_size_wide_log2[0]; - max_blocks_wide >>= tx_size_wide_log2[0]; - int coeff_ctx = get_entropy_context(tx_size, a, l); - av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, - coeff_ctx, AV1_XFORM_QUANT_FP); - - av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); - // TODO(any): Use av1_dist_block to compute distortion #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -3954,21 +4362,35 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, 0, bw, bh); #endif // CONFIG_HIGHBITDEPTH - if (blk_row + txb_h > max_blocks_high || blk_col + txb_w > max_blocks_wide) { - int idx, idy; - int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row); - int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col); - tmp = 0; - for (idy = 0; idy < blocks_height; ++idy) { - for (idx = 0; idx < blocks_width; ++idx) { - const int16_t *d = - diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]); - tmp += sum_squares_2d(d, diff_stride, 0); - } - } - } else { - tmp = sum_squares_2d(diff, diff_stride, tx_size); +#if DISABLE_TRELLISQ_SEARCH + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + coeff_ctx, AV1_XFORM_QUANT_B); + +#else + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + coeff_ctx, AV1_XFORM_QUANT_FP); + + const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + const int buffer_length = tx_size_2d[tx_size]; + int64_t tmp_dist; +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + tmp_dist = + av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >> + shift; + else +#endif + tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift; + + if (RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) { + av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, + a, l); } +#endif // DISABLE_TRELLISQ_SEARCH + + tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col, + plane_bsize, txm_bsize); #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) @@ -3977,36 +4399,48 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, rd_stats->sse += tmp * 16; const int eob = p->eobs[block]; +#if CONFIG_LGT + PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block); + av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, rec_buffer, + MAX_TX_SIZE, eob); +#else av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer, MAX_TX_SIZE, eob); +#endif if (eob > 0) { - if (txb_w + blk_col > max_blocks_wide || - txb_h + blk_row > max_blocks_high) { - int idx, idy; - unsigned int this_dist; - int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row); - int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col); - tmp = 0; - for (idy = 0; idy < blocks_height; ++idy) { - for (idx = 0; idx < blocks_width; ++idx) { - uint8_t *const s = - src + ((idy * src_stride + idx) << tx_size_wide_log2[0]); - uint8_t *const r = - rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]); - cpi->fn_ptr[0].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist); - tmp += this_dist; - } +#if CONFIG_DIST_8X8 + if (plane == 0 && (bw < 8 && bh < 8)) { + // Save sub8x8 luma decoded pixels + // since 8x8 luma decoded pixels are not available for daala-dist + // after recursive split of BLOCK_8x8 is done. + const int pred_stride = block_size_wide[plane_bsize]; + const int pred_idx = (blk_row * pred_stride + blk_col) + << tx_size_wide_log2[0]; + int16_t *decoded = &pd->pred[pred_idx]; + int i, j; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (j = 0; j < bh; j++) + for (i = 0; i < bw; i++) + decoded[j * pred_stride + i] = + CONVERT_TO_SHORTPTR(rec_buffer)[j * MAX_TX_SIZE + i]; + } else { +#endif + for (j = 0; j < bh; j++) + for (i = 0; i < bw; i++) + decoded[j * pred_stride + i] = rec_buffer[j * MAX_TX_SIZE + i]; +#if CONFIG_HIGHBITDEPTH } - } else { - uint32_t this_dist; - cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE, - &this_dist); - tmp = this_dist; +#endif // CONFIG_HIGHBITDEPTH } +#endif // CONFIG_DIST_8X8 + tmp = pixel_dist(cpi, x, plane, src, src_stride, rec_buffer, MAX_TX_SIZE, + blk_row, blk_col, plane_bsize, txm_bsize); } rd_stats->dist += tmp * 16; - txb_coeff_cost = - av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, 0); + txb_coeff_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, + tx_size, scan_order, a, l, 0); rd_stats->rate += txb_coeff_cost; rd_stats->skip &= (eob == 0); @@ -4038,14 +4472,35 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int64_t this_rd = INT64_MAX; ENTROPY_CONTEXT *pta = ta + blk_col; ENTROPY_CONTEXT *ptl = tl + blk_row; - int coeff_ctx, i; + int i; int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row, mbmi->sb_type, tx_size); int64_t sum_rd = INT64_MAX; int tmp_eob = 0; int zero_blk_rate; RD_STATS sum_rd_stats; - const int tx_size_ctx = txsize_sqr_map[tx_size]; +#if CONFIG_TXK_SEL + TX_TYPE best_tx_type = TX_TYPES; + int txk_idx = (blk_row << 4) + blk_col; +#endif +#if CONFIG_RECT_TX_EXT + TX_SIZE quarter_txsize = quarter_txsize_lookup[mbmi->sb_type]; + int check_qttx = is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) && + tx_size == max_txsize_rect_lookup[mbmi->sb_type] && + quarter_txsize != tx_size; + int is_qttx_picked = 0; + int eobs_qttx[2] = { 0, 0 }; + int skip_qttx[2] = { 0, 0 }; + int block_offset_qttx = check_qttx + ? tx_size_wide_unit[quarter_txsize] * + tx_size_high_unit[quarter_txsize] + : 0; + int blk_row_offset, blk_col_offset; + int is_wide_qttx = + tx_size_wide_unit[quarter_txsize] > tx_size_high_unit[quarter_txsize]; + blk_row_offset = is_wide_qttx ? tx_size_high_unit[quarter_txsize] : 0; + blk_col_offset = is_wide_qttx ? 0 : tx_size_wide_unit[quarter_txsize]; +#endif av1_init_rd_stats(&sum_rd_stats); @@ -4056,15 +4511,25 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, return; } - coeff_ctx = get_entropy_context(tx_size, pta, ptl); - av1_init_rd_stats(rd_stats); if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; +#if CONFIG_LV_MAP + TX_SIZE txs_ctx = get_txsize_context(tx_size); + TXB_CTX txb_ctx; + get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx); + zero_blk_rate = + av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_ctx.txb_skip_ctx], 1); +#else + int tx_size_ctx = txsize_sqr_map[tx_size]; + int coeff_ctx = get_entropy_context(tx_size, pta, ptl); zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0] [coeff_ctx][EOB_TOKEN]; +#endif + rd_stats->ref_rdcost = ref_best_rd; + rd_stats->zero_rate = zero_blk_rate; if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) { inter_tx_size[0][0] = tx_size; @@ -4081,8 +4546,8 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, } } - if ((RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >= - RDCOST(x->rdmult, x->rddiv, zero_blk_rate, rd_stats->sse) || + if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >= + RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) || rd_stats->skip == 1) && !xd->lossless[mbmi->segment_id]) { #if CONFIG_RD_DEBUG @@ -4094,6 +4559,9 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, rd_stats->skip = 1; x->blk_skip[plane][blk_row * bw + blk_col] = 1; p->eobs[block] = 0; +#if CONFIG_TXK_SEL + mbmi->txk_type[txk_idx] = DCT_DCT; +#endif } else { x->blk_skip[plane][blk_row * bw + blk_col] = 0; rd_stats->skip = 0; @@ -4102,23 +4570,143 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) rd_stats->rate += av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0); - this_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist); +#if CONFIG_RECT_TX_EXT + if (check_qttx) { + assert(blk_row == 0 && blk_col == 0); + rd_stats->rate += av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 0); + } +#endif + this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); +#if CONFIG_LV_MAP + tmp_eob = p->txb_entropy_ctx[block]; +#else tmp_eob = p->eobs[block]; +#endif + +#if CONFIG_TXK_SEL + best_tx_type = mbmi->txk_type[txk_idx]; +#endif + +#if CONFIG_RECT_TX_EXT + if (check_qttx) { + assert(blk_row == 0 && blk_col == 0 && block == 0 && plane == 0); + + RD_STATS rd_stats_tmp, rd_stats_qttx; + int64_t rd_qttx; + + av1_init_rd_stats(&rd_stats_qttx); + av1_init_rd_stats(&rd_stats_tmp); + + av1_tx_block_rd_b(cpi, x, quarter_txsize, 0, 0, plane, 0, plane_bsize, + pta, ptl, &rd_stats_qttx); + + tx_size_ctx = txsize_sqr_map[quarter_txsize]; + coeff_ctx = get_entropy_context(quarter_txsize, pta, ptl); + zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0] + [coeff_ctx][EOB_TOKEN]; + if ((RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist) >= + RDCOST(x->rdmult, zero_blk_rate, rd_stats_qttx.sse) || + rd_stats_qttx.skip == 1) && + !xd->lossless[mbmi->segment_id]) { +#if CONFIG_RD_DEBUG + av1_update_txb_coeff_cost(&rd_stats_qttx, plane, quarter_txsize, 0, 0, + zero_blk_rate - rd_stats_qttx.rate); +#endif // CONFIG_RD_DEBUG + rd_stats_qttx.rate = zero_blk_rate; + rd_stats_qttx.dist = rd_stats_qttx.sse; + rd_stats_qttx.skip = 1; + x->blk_skip[plane][blk_row * bw + blk_col] = 1; + skip_qttx[0] = 1; + p->eobs[block] = 0; + } else { + x->blk_skip[plane][blk_row * bw + blk_col] = 0; + skip_qttx[0] = 0; + rd_stats->skip = 0; + } + + // Second tx block + av1_tx_block_rd_b(cpi, x, quarter_txsize, blk_row_offset, blk_col_offset, + plane, block_offset_qttx, plane_bsize, pta, ptl, + &rd_stats_tmp); + + av1_set_txb_context(x, plane, 0, quarter_txsize, pta, ptl); + coeff_ctx = get_entropy_context(quarter_txsize, pta + blk_col_offset, + ptl + blk_row_offset); + zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0] + [coeff_ctx][EOB_TOKEN]; + if ((RDCOST(x->rdmult, rd_stats_tmp.rate, rd_stats_tmp.dist) >= + RDCOST(x->rdmult, zero_blk_rate, rd_stats_tmp.sse) || + rd_stats_tmp.skip == 1) && + !xd->lossless[mbmi->segment_id]) { +#if CONFIG_RD_DEBUG + av1_update_txb_coeff_cost(&rd_stats_tmp, plane, quarter_txsize, 0, 0, + zero_blk_rate - rd_stats_tmp.rate); +#endif // CONFIG_RD_DEBUG + rd_stats_tmp.rate = zero_blk_rate; + rd_stats_tmp.dist = rd_stats_tmp.sse; + rd_stats_tmp.skip = 1; + x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 1; + skip_qttx[1] = 1; + p->eobs[block_offset_qttx] = 0; + } else { + x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 0; + skip_qttx[1] = 0; + rd_stats_tmp.skip = 0; + } + + av1_merge_rd_stats(&rd_stats_qttx, &rd_stats_tmp); + + if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) { + rd_stats_qttx.rate += + av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0); + } + rd_stats_qttx.rate += + av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 1); + rd_qttx = RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist); +#if CONFIG_LV_MAP + eobs_qttx[0] = p->txb_entropy_ctx[0]; + eobs_qttx[1] = p->txb_entropy_ctx[block_offset_qttx]; +#else + eobs_qttx[0] = p->eobs[0]; + eobs_qttx[1] = p->eobs[block_offset_qttx]; +#endif + if (rd_qttx < this_rd) { + is_qttx_picked = 1; + this_rd = rd_qttx; + rd_stats->rate = rd_stats_qttx.rate; + rd_stats->dist = rd_stats_qttx.dist; + rd_stats->sse = rd_stats_qttx.sse; + rd_stats->skip = rd_stats_qttx.skip; + rd_stats->rdcost = rd_stats_qttx.rdcost; + } + av1_get_entropy_contexts(plane_bsize, 0, pd, ta, tl); + } +#endif } +#if CONFIG_MRC_TX + // If the tx type we are trying is MRC_DCT, we cannot partition the transform + // into anything smaller than TX_32X32 + if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && mbmi->tx_type != MRC_DCT) { +#else if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) { +#endif const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; const int bsl = tx_size_wide_unit[sub_txs]; int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs]; RD_STATS this_rd_stats; int this_cost_valid = 1; int64_t tmp_rd = 0; - +#if CONFIG_DIST_8X8 + int sub8x8_eob[4]; +#endif sum_rd_stats.rate = av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1); assert(tx_size < TX_SIZES_ALL); + ref_best_rd = AOMMIN(this_rd, ref_best_rd); + for (i = 0; i < 4 && this_cost_valid; ++i) { int offsetr = blk_row + (i >> 1) * bsl; int offsetc = blk_col + (i & 0x01) * bsl; @@ -4129,30 +4717,170 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, depth + 1, plane_bsize, ta, tl, tx_above, tx_left, &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid, rd_stats_stack); - +#if CONFIG_DIST_8X8 + if (plane == 0 && tx_size == TX_8X8) { + sub8x8_eob[i] = p->eobs[block]; + } +#endif // CONFIG_DIST_8X8 av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats); - tmp_rd = - RDCOST(x->rdmult, x->rddiv, sum_rd_stats.rate, sum_rd_stats.dist); + tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist); +#if !CONFIG_DIST_8X8 if (this_rd < tmp_rd) break; +#endif block += sub_step; } +#if CONFIG_DIST_8X8 + if (this_cost_valid && plane == 0 && tx_size == TX_8X8) { + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + + const uint8_t *src = + &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; + const uint8_t *dst = + &pd->dst + .buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; + + int64_t dist_8x8; + int qindex = x->qindex; + const int pred_stride = block_size_wide[plane_bsize]; + const int pred_idx = (blk_row * pred_stride + blk_col) + << tx_size_wide_log2[0]; + int16_t *pred = &pd->pred[pred_idx]; + int j; + int row, col; + +#if CONFIG_HIGHBITDEPTH + uint8_t *pred8; + DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]); +#else + DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]); +#endif // CONFIG_HIGHBITDEPTH + + dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, + BLOCK_8X8, 8, 8, 8, 8, qindex) * + 16; + sum_rd_stats.sse = dist_8x8; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + pred8 = CONVERT_TO_BYTEPTR(pred8_16); + else + pred8 = (uint8_t *)pred8_16; +#endif + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + for (row = 0; row < 2; ++row) { + for (col = 0; col < 2; ++col) { + int idx = row * 2 + col; + int eob = sub8x8_eob[idx]; + + if (eob > 0) { + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + CONVERT_TO_SHORTPTR(pred8) + [(row * 4 + j) * 8 + 4 * col + i] = + pred[(row * 4 + j) * pred_stride + 4 * col + i]; + } else { + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + CONVERT_TO_SHORTPTR(pred8) + [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR( + dst)[(row * 4 + j) * dst_stride + 4 * col + i]; + } + } + } + } else { +#endif + for (row = 0; row < 2; ++row) { + for (col = 0; col < 2; ++col) { + int idx = row * 2 + col; + int eob = sub8x8_eob[idx]; + + if (eob > 0) { + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + pred8[(row * 4 + j) * 8 + 4 * col + i] = + pred[(row * 4 + j) * pred_stride + 4 * col + i]; + } else { + for (j = 0; j < 4; j++) + for (i = 0; i < 4; i++) + pred8[(row * 4 + j) * 8 + 4 * col + i] = + dst[(row * 4 + j) * dst_stride + 4 * col + i]; + } + } + } +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, 8, BLOCK_8X8, 8, + 8, 8, 8, qindex) * + 16; + sum_rd_stats.dist = dist_8x8; + tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist); + } +#endif // CONFIG_DIST_8X8 if (this_cost_valid) sum_rd = tmp_rd; } if (this_rd < sum_rd) { int idx, idy; - for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0); - for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0); +#if CONFIG_RECT_TX_EXT + TX_SIZE tx_size_selected = is_qttx_picked ? quarter_txsize : tx_size; +#else + TX_SIZE tx_size_selected = tx_size; +#endif + +#if CONFIG_RECT_TX_EXT + if (is_qttx_picked) { + assert(blk_row == 0 && blk_col == 0 && plane == 0); +#if CONFIG_LV_MAP + p->txb_entropy_ctx[0] = eobs_qttx[0]; + p->txb_entropy_ctx[block_offset_qttx] = eobs_qttx[1]; +#else + p->eobs[0] = eobs_qttx[0]; + p->eobs[block_offset_qttx] = eobs_qttx[1]; +#endif + } else { +#endif +#if CONFIG_LV_MAP + p->txb_entropy_ctx[block] = tmp_eob; +#else + p->eobs[block] = tmp_eob; +#endif +#if CONFIG_RECT_TX_EXT + } +#endif + + av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl); +#if CONFIG_RECT_TX_EXT + if (is_qttx_picked) + av1_set_txb_context(x, plane, block_offset_qttx, tx_size_selected, + pta + blk_col_offset, ptl + blk_row_offset); +#endif + txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size, tx_size); - inter_tx_size[0][0] = tx_size; + inter_tx_size[0][0] = tx_size_selected; for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy) for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx) - inter_tx_size[idy][idx] = tx_size; - mbmi->tx_size = tx_size; + inter_tx_size[idy][idx] = tx_size_selected; + mbmi->tx_size = tx_size_selected; +#if CONFIG_TXK_SEL + mbmi->txk_type[txk_idx] = best_tx_type; +#endif if (this_rd == INT64_MAX) *is_cost_valid = 0; - x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip; +#if CONFIG_RECT_TX_EXT + if (is_qttx_picked) { + x->blk_skip[plane][0] = skip_qttx[0]; + x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = skip_qttx[1]; + } else { +#endif + x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip; +#if CONFIG_RECT_TX_EXT + } +#endif } else { *rd_stats = sum_rd_stats; if (sum_rd == INT64_MAX) *is_cost_valid = 0; @@ -4201,17 +4929,16 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd, &is_cost_valid, rd_stats_stack); av1_merge_rd_stats(rd_stats, &pn_rd_stats); - this_rd += AOMMIN( - RDCOST(x->rdmult, x->rddiv, pn_rd_stats.rate, pn_rd_stats.dist), - RDCOST(x->rdmult, x->rddiv, 0, pn_rd_stats.sse)); + this_rd += AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist), + RDCOST(x->rdmult, 0, pn_rd_stats.sse)); block += step; ++block32; } } } - this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist), - RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse)); + this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist), + RDCOST(x->rdmult, 0, rd_stats->sse)); if (this_rd > ref_best_rd) is_cost_valid = 0; if (!is_cost_valid) { @@ -4247,6 +4974,7 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x, mbmi->min_tx_size = AOMMIN( mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col])); +#if !CONFIG_TXK_SEL #if CONFIG_EXT_TX if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 && @@ -4266,20 +4994,21 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x, [mbmi->tx_type]; } } -#else // CONFIG_EXT_TX +#else if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id]) rd_stats->rate += cpi->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type]; #endif // CONFIG_EXT_TX +#endif // CONFIG_TXK_SEL if (rd_stats->skip) - rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse); + rd = RDCOST(x->rdmult, s1, rd_stats->sse); else - rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist); + rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist); if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(rd_stats->skip)) - rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse)); + rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse)); return rd; } @@ -4299,6 +5028,12 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE best_tx = max_txsize_lookup[bsize]; TX_SIZE best_min_tx_size = TX_SIZES_ALL; uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8]; + TX_TYPE txk_start = DCT_DCT; +#if CONFIG_TXK_SEL + TX_TYPE txk_end = DCT_DCT + 1; +#else + TX_TYPE txk_end = TX_TYPES; +#endif const int n4 = bsize_to_num_blk(bsize); int idx, idy; int prune = 0; @@ -4326,9 +5061,14 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x, for (idx = 0; idx < count32; ++idx) av1_invalid_rd_stats(&rd_stats_stack[idx]); - for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { + for (tx_type = txk_start; tx_type < txk_end; ++tx_type) { RD_STATS this_rd_stats; av1_init_rd_stats(&this_rd_stats); +#if CONFIG_MRC_TX + // MRC_DCT only implemented for TX_32X32 so only include this tx in + // the search for TX_32X32 + if (tx_type == MRC_DCT && max_tx_size != TX_32X32) continue; +#endif // CONFIG_MRC_TX #if CONFIG_EXT_TX if (is_inter) { if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue; @@ -4384,7 +5124,6 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; const int tx_row = blk_row >> (1 - pd->subsampling_y); @@ -4402,16 +5141,11 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, : mbmi->inter_tx_size[tx_row][tx_col]; if (tx_size == plane_tx_size) { - int i; ENTROPY_CONTEXT *ta = above_ctx + blk_col; ENTROPY_CONTEXT *tl = left_ctx + blk_row; av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, ta, tl, rd_stats); - - for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) - ta[i] = !(p->eobs[block] == 0); - for (i = 0; i < tx_size_high_unit[tx_size]; ++i) - tl[i] = !(p->eobs[block] == 0); + av1_set_txb_context(x, plane, block, tx_size, ta, tl); } else { const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; const int bsl = tx_size_wide_unit[sub_txs]; @@ -4498,9 +5232,8 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, av1_merge_rd_stats(rd_stats, &pn_rd_stats); - this_rd = - AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist), - RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse)); + this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist), + RDCOST(x->rdmult, 0, rd_stats->sse)); if (this_rd > ref_best_rd) { is_cost_valid = 0; @@ -4543,7 +5276,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, &plane_block_height, &rows, &cols); if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; #if CONFIG_FILTER_INTRA mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0; #endif // CONFIG_FILTER_INTRA @@ -4689,7 +5422,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } } - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; *best_mbmi = *mbmi; @@ -4727,7 +5460,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, av1_zero(filter_intra_mode_info); mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; #if CONFIG_PALETTE mbmi->palette_mode_info.palette_size[1] = 0; #endif // CONFIG_PALETTE @@ -4741,7 +5474,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) + cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] + write_uniform_cost(FILTER_INTRA_MODES, mode); - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; *rate = this_rate; @@ -4754,7 +5487,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } if (filter_intra_selected_flag) { - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = filter_intra_mode_info.use_filter_intra_mode[1]; mbmi->filter_intra_mode_info.filter_intra_mode[1] = @@ -4782,7 +5515,7 @@ static int64_t pick_intra_angle_routine_sbuv( if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in)) return INT64_MAX; this_rate = tokenonly_rd_stats.rate + rate_overhead; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < *best_rd) { *best_rd = this_rd; *best_angle_delta = mbmi->angle_delta[1]; @@ -4852,8 +5585,172 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } #endif // CONFIG_EXT_INTRA +#if CONFIG_CFL +static int64_t cfl_alpha_dist(const uint8_t *y_pix, int y_stride, + const int y_averages_q3[MAX_NUM_TXB], + const uint8_t *src, int src_stride, int width, + int height, TX_SIZE tx_size, int dc_pred, + int alpha_q3, int64_t *dist_neg_out) { + int64_t dist = 0; + int diff; + + if (alpha_q3 == 0) { + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + diff = src[i] - dc_pred; + dist += diff * diff; + } + src += src_stride; + } + + if (dist_neg_out) *dist_neg_out = dist; + + return dist; + } + + int64_t dist_neg = 0; + const int tx_height = tx_size_high[tx_size]; + const int tx_width = tx_size_wide[tx_size]; + const int y_block_row_off = y_stride * tx_height; + const int src_block_row_off = src_stride * tx_height; + const uint8_t *t_y_pix; + const uint8_t *t_src; + int a = 0; + for (int b_j = 0; b_j < height; b_j += tx_height) { + const int h = b_j + tx_height; + for (int b_i = 0; b_i < width; b_i += tx_width) { + const int w = b_i + tx_width; + const int tx_avg_q3 = y_averages_q3[a++]; + t_y_pix = y_pix; + t_src = src; + for (int t_j = b_j; t_j < h; t_j++) { + for (int t_i = b_i; t_i < w; t_i++) { + const int uv = t_src[t_i]; + + const int scaled_luma = + get_scaled_luma_q0(alpha_q3, t_y_pix[t_i], tx_avg_q3); + + // TODO(ltrudeau) add support for HBD. + diff = uv - clamp(scaled_luma + dc_pred, 0, 255); + dist += diff * diff; + + // TODO(ltrudeau) add support for HBD. + diff = uv - clamp(-scaled_luma + dc_pred, 0, 255); + dist_neg += diff * diff; + } + t_y_pix += y_stride; + t_src += src_stride; + } + } + y_pix += y_block_row_off; + src += src_block_row_off; + } + + if (dist_neg_out) *dist_neg_out = dist_neg; + + return dist; +} + +static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) { + assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] == + AOM_ICDF(CDF_PROB_TOP)); + + aom_cdf_prob prev_cdf = 0; + + for (int c = 0; c < CFL_ALPHABET_SIZE; c++) { + const int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) + + (cfl_alpha_codes[c][CFL_PRED_V] != 0); + + aom_cdf_prob prob = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - prev_cdf; + prev_cdf = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]); + + cfl->costs[c] = av1_cost_symbol(prob) + av1_cost_literal(sign_bit_cost); + } +} + +static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) { + const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U]; + const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V]; + const uint8_t *const src_u = p_u->src.buf; + const uint8_t *const src_v = p_v->src.buf; + const int src_stride_u = p_u->src.stride; + const int src_stride_v = p_v->src.stride; + + MACROBLOCKD *const xd = &x->e_mbd; + FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + + CFL_CTX *const cfl = xd->cfl; + cfl_compute_parameters(xd, tx_size); + const int width = cfl->uv_width; + const int height = cfl->uv_height; + const int dc_pred_u = cfl->dc_pred[CFL_PRED_U]; + const int dc_pred_v = cfl->dc_pred[CFL_PRED_V]; + const int *y_averages_q3 = cfl->y_averages_q3; + const uint8_t *y_pix = cfl->y_down_pix; + + CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs; + + cfl_update_costs(cfl, ec_ctx); + + int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE]; + sse[CFL_PRED_U][0] = + cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, + width, height, tx_size, dc_pred_u, 0, NULL); + sse[CFL_PRED_V][0] = + cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, + width, height, tx_size, dc_pred_v, 0, NULL); + + for (int m = 1; m < CFL_MAGS_SIZE; m += 2) { + assert(cfl_alpha_mags_q3[m + 1] == -cfl_alpha_mags_q3[m]); + sse[CFL_PRED_U][m] = cfl_alpha_dist( + y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height, + tx_size, dc_pred_u, cfl_alpha_mags_q3[m], &sse[CFL_PRED_U][m + 1]); + sse[CFL_PRED_V][m] = cfl_alpha_dist( + y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height, + tx_size, dc_pred_v, cfl_alpha_mags_q3[m], &sse[CFL_PRED_V][m + 1]); + } + + int64_t dist; + int64_t cost; + int64_t best_cost; + + // Compute least squares parameter of the entire block + // IMPORTANT: We assume that the first code is 0,0 + int ind = 0; + signs[CFL_PRED_U] = CFL_SIGN_POS; + signs[CFL_PRED_V] = CFL_SIGN_POS; + + dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0]; + dist *= 16; + best_cost = RDCOST(x->rdmult, cfl->costs[0], dist); + + for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { + const int idx_u = cfl_alpha_codes[c][CFL_PRED_U]; + const int idx_v = cfl_alpha_codes[c][CFL_PRED_V]; + for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) { + for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) { + dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] + + sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)]; + dist *= 16; + cost = RDCOST(x->rdmult, cfl->costs[c], dist); + if (cost < best_cost) { + best_cost = cost; + ind = c; + signs[CFL_PRED_U] = sign_u; + signs[CFL_PRED_V] = sign_v; + } + } + } + } + + mbmi->cfl_alpha_idx = ind; + return cfl->costs[ind]; +} +#endif // CONFIG_CFL + static void init_sbuv_mode(MB_MODE_INFO *const mbmi) { - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; #if CONFIG_PALETTE mbmi->palette_mode_info.palette_size[1] = 0; #endif // CONFIG_PALETTE @@ -4870,20 +5767,19 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; assert(!is_inter_block(mbmi)); MB_MODE_INFO best_mbmi = *mbmi; - PREDICTION_MODE mode; int64_t best_rd = INT64_MAX, this_rd; - int this_rate; - RD_STATS tokenonly_rd_stats; #if CONFIG_PVQ od_rollback_buffer buf; od_encode_checkpoint(&x->daala_enc, &buf); #endif // CONFIG_PVQ #if CONFIG_PALETTE PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - uint8_t *best_palette_color_map = NULL; #endif // CONFIG_PALETTE - for (mode = DC_PRED; mode <= TM_PRED; ++mode) { + for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) { + int this_rate; + RD_STATS tokenonly_rd_stats; + UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx]; #if CONFIG_EXT_INTRA const int is_directional_mode = av1_is_directional_mode(mode, mbmi->sb_type); @@ -4893,9 +5789,16 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, continue; mbmi->uv_mode = mode; +#if CONFIG_CFL + int cfl_alpha_rate = 0; + if (mode == UV_DC_PRED) { + const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]); + cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size); + } +#endif #if CONFIG_EXT_INTRA mbmi->angle_delta[1] = 0; - if (is_directional_mode) { + if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) { const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] + write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0); if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd, @@ -4915,8 +5818,13 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, this_rate = tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode]; +#if CONFIG_CFL + if (mode == UV_DC_PRED) { + this_rate += cfl_alpha_rate; + } +#endif #if CONFIG_EXT_INTRA - if (is_directional_mode) { + if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) { this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[1]); } @@ -4927,7 +5835,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #endif // CONFIG_FILTER_INTRA #if CONFIG_PALETTE if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 && - mode == DC_PRED) + mode == UV_DC_PRED) this_rate += av1_cost_bit( av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0); #endif // CONFIG_PALETTE @@ -4935,7 +5843,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #if CONFIG_PVQ od_encode_rollback(&x->daala_enc, &buf); #endif // CONFIG_PVQ - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist); + this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); if (this_rd < best_rd) { best_mbmi = *mbmi; @@ -4949,9 +5857,9 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #if CONFIG_PALETTE if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) { - best_palette_color_map = x->palette_buffer->best_palette_color_map; + uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map; rd_pick_palette_intra_sbuv(cpi, x, - cpi->intra_uv_mode_cost[mbmi->mode][DC_PRED], + cpi->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED], best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly, distortion, skippable); } @@ -4975,7 +5883,7 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, TX_SIZE max_tx_size, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv, - int *skip_uv, PREDICTION_MODE *mode_uv) { + int *skip_uv, UV_PREDICTION_MODE *mode_uv) { // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. (void)ctx; @@ -4990,7 +5898,7 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, *rate_uv_tokenonly = 0; *dist_uv = 0; *skip_uv = 1; - *mode_uv = DC_PRED; + *mode_uv = UV_DC_PRED; return; } BLOCK_SIZE bs = scale_chroma_bsize(bsize, x->e_mbd.plane[1].subsampling_x, @@ -5011,6 +5919,12 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode, if (is_inter_compound_mode(mode)) { return cpi ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)]; +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mode)) { + return cpi + ->inter_singleref_comp_mode_cost[mode_context] + [INTER_SINGLEREF_COMP_OFFSET(mode)]; +#endif // CONFIG_COMPOUND_SINGLEREF } #endif @@ -5096,8 +6010,13 @@ typedef struct { int segment_yrate; PREDICTION_MODE modes[4]; #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + SEG_RDSTAT rdstat[4][INTER_MODES + INTER_SINGLEREF_COMP_MODES + + INTER_COMPOUND_MODES]; +#else // !CONFIG_COMPOUND_SINGLEREF SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES]; -#else +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER SEG_RDSTAT rdstat[4][INTER_MODES]; #endif // CONFIG_EXT_INTER int mvthresh; @@ -5120,27 +6039,28 @@ static int check_best_zero_mv( int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode, const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block, int mi_row, int mi_col) { - int_mv zeromv[2]; + int_mv zeromv[2] = { {.as_int = 0 } }; +#if CONFIG_GLOBAL_MOTION int comp_pred_mode = ref_frames[1] > INTRA_FRAME; - int cur_frm; +#endif (void)mi_row; (void)mi_col; - for (cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) { #if CONFIG_GLOBAL_MOTION - if (this_mode == ZEROMV + if (this_mode == ZEROMV #if CONFIG_EXT_INTER - || this_mode == ZERO_ZEROMV + || this_mode == ZERO_ZEROMV #endif // CONFIG_EXT_INTER - ) + ) { + for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) { zeromv[cur_frm].as_int = gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]], cpi->common.allow_high_precision_mv, bsize, mi_col, mi_row, block) .as_int; - else -#endif // CONFIG_GLOBAL_MOTION - zeromv[cur_frm].as_int = 0; + } } +#endif // CONFIG_GLOBAL_MOTION + #if !CONFIG_EXT_INTER assert(ref_frames[1] != INTRA_FRAME); // Just sanity check #endif // !CONFIG_EXT_INTER @@ -5201,8 +6121,11 @@ static int check_best_zero_mv( } static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row, - int mi_col, + BLOCK_SIZE bsize, int_mv *frame_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv *frame_comp_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int mi_row, int mi_col, #if CONFIG_EXT_INTER int_mv *ref_mv_sub8x8[2], const uint8_t *mask, int mask_stride, @@ -5213,35 +6136,47 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, const int ph = block_size_high[bsize]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - // This function should only ever be called for compound modes +// This function should only ever be called for compound modes +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) { + assert(is_inter_singleref_comp_mode(mbmi->mode)); + assert(frame_comp_mv); + } + assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode)); + const int refs[2] = { mbmi->ref_frame[0], has_second_ref(mbmi) + ? mbmi->ref_frame[1] + : mbmi->ref_frame[0] }; +#else assert(has_second_ref(mbmi)); const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] }; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF int_mv ref_mv[2]; int ite, ref; -#if CONFIG_DUAL_FILTER - InterpFilter interp_filter[4] = { - mbmi->interp_filter[0], mbmi->interp_filter[1], mbmi->interp_filter[2], - mbmi->interp_filter[3], - }; -#else - const InterpFilter interp_filter = mbmi->interp_filter; -#endif // CONFIG_DUAL_FILTER struct scale_factors sf; - struct macroblockd_plane *const pd = &xd->plane[0]; #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block" const int ic = block & 1; const int ir = (block - ic) >> 1; + struct macroblockd_plane *const pd = &xd->plane[0]; const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; #if CONFIG_GLOBAL_MOTION int is_global[2]; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { +#else for (ref = 0; ref < 2; ++ref) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF WarpedMotionParams *const wm = &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]]; is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype); } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) is_global[1] = is_global[0]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #endif // CONFIG_GLOBAL_MOTION +#else // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + (void)block; #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION // Do joint motion search in compound mode to get more accurate mv. @@ -5264,7 +6199,11 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, (void)ref_mv_sub8x8; #endif // CONFIG_EXT_INTER && CONFIG_CB4X4 +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { +#else for (ref = 0; ref < 2; ++ref) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #if CONFIG_EXT_INTER && !CONFIG_CB4X4 if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL) ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int; @@ -5284,6 +6223,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, } } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) { + assert(is_inter_singleref_comp_mode(mbmi->mode)); + // NOTE: For single ref comp mode, set up the 2nd set of ref_mv/pre_planes + // all from the 1st reference frame, i.e. refs[0]. + ref_mv[1] = x->mbmi_ext->ref_mvs[refs[0]][0]; + if (scaled_ref_frame[0]) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[1][i] = xd->plane[i].pre[1]; + av1_setup_pre_planes(xd, 1, scaled_ref_frame[0], mi_row, mi_col, NULL); + } + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Since we have scaled the reference frames to match the size of the current // frame we must use a unit scaling factor during mode selection. #if CONFIG_HIGHBITDEPTH @@ -5294,9 +6251,16 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, cm->height); #endif // CONFIG_HIGHBITDEPTH - // Allow joint search multiple times iteratively for each reference frame - // and break out of the search loop if it couldn't find a better mv. +// Allow joint search multiple times iteratively for each reference frame +// and break out of the search loop if it couldn't find a better mv. +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + const int num_ites = + (has_second_ref(mbmi) || mbmi->mode == SR_NEW_NEWMV) ? 4 : 1; + const int start_ite = has_second_ref(mbmi) ? 0 : 1; + for (ite = start_ite; ite < (start_ite + num_ites); ite++) { +#else for (ite = 0; ite < 4; ite++) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF struct buf_2d ref_yv12[2]; int bestsme = INT_MAX; int sadpb = x->sadperbit16; @@ -5308,7 +6272,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, // odd iterations search in the second. The predictor // found for the 'other' reference frame is factored in. const int plane = 0; - ConvolveParams conv_params = get_conv_params(0, plane); + ConvolveParams conv_params = get_conv_params(!id, 0, plane); #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION WarpTypesAllowed warp_types; #if CONFIG_GLOBAL_MOTION @@ -5323,21 +6287,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, ref_yv12[0] = xd->plane[plane].pre[0]; ref_yv12[1] = xd->plane[plane].pre[1]; -#if CONFIG_DUAL_FILTER - // reload the filter types - interp_filter[0] = - (id == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0]; - interp_filter[1] = - (id == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1]; -#endif // CONFIG_DUAL_FILTER - // Get the prediction block from the 'other' reference frame. +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + MV *const the_other_mv = (has_second_ref(mbmi) || id) + ? &frame_mv[refs[!id]].as_mv + : &frame_comp_mv[refs[0]].as_mv; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); av1_highbd_build_inter_predictor( ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, - &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + the_other_mv, +#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF) + &frame_mv[refs[!id]].as_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + &sf, pw, ph, 0, mbmi->interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION &warp_types, p_col, p_row, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -5347,7 +6314,12 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, #endif // CONFIG_HIGHBITDEPTH av1_build_inter_predictor( ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, - &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + the_other_mv, +#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF) + &frame_mv[refs[!id]].as_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + &sf, pw, ph, &conv_params, mbmi->interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION &warp_types, p_col, p_row, plane, !id, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -5360,13 +6332,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, if (id) xd->plane[plane].pre[0] = ref_yv12[id]; av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv); - // Use the mv result from the single mode as mv predictor. - *best_mv = frame_mv[refs[id]].as_mv; +// Use the mv result from the single mode as mv predictor. +// Use the mv result from the single mode as mv predictor. +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi) && id) + *best_mv = frame_comp_mv[refs[0]].as_mv; + else +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + *best_mv = frame_mv[refs[id]].as_mv; best_mv->col >>= 3; best_mv->row >>= 3; - av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx); +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); + else +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx); // Small-range full-pixel motion search. bestsme = @@ -5392,60 +6375,33 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - if (cpi->sf.use_upsampled_references) { - // Use up-sampled reference frames. - struct buf_2d backup_pred = pd->pre[0]; - const YV12_BUFFER_CONFIG *upsampled_ref = - get_upsampled_ref(cpi, refs[id]); - - // Set pred for Y plane - setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer, - upsampled_ref->y_crop_width, - upsampled_ref->y_crop_height, upsampled_ref->y_stride, - (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x, - pd->subsampling_y); - -// If bsize < BLOCK_8X8, adjust pred pointer for this block -#if !CONFIG_CB4X4 - if (bsize < BLOCK_8X8) - pd->pre[0].buf = - &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block, - pd->pre[0].stride)) - << 3]; -#endif // !CONFIG_CB4X4 - - bestsme = cpi->find_fractional_mv_step( - x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], 0, - cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, -#if CONFIG_EXT_INTER - mask, mask_stride, id, -#endif - pw, ph, 1); - - // Restore the reference frames. - pd->pre[0] = backup_pred; - } else { - (void)block; - bestsme = cpi->find_fractional_mv_step( - x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], 0, - cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, + bestsme = cpi->find_fractional_mv_step( + x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, + x->errorperbit, &cpi->fn_ptr[bsize], 0, + cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, #if CONFIG_EXT_INTER - mask, mask_stride, id, + mask, mask_stride, id, #endif - pw, ph, 0); - } + pw, ph, cpi->sf.use_upsampled_references); } // Restore the pointer to the first (possibly scaled) prediction buffer. if (id) xd->plane[plane].pre[0] = ref_yv12[0]; if (bestsme < last_besterr[id]) { - frame_mv[refs[id]].as_mv = *best_mv; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // NOTE: For single ref comp mode, frame_mv stores the first mv and + // frame_comp_mv stores the second mv. + if (!has_second_ref(mbmi) && id) + frame_comp_mv[refs[0]].as_mv = *best_mv; + else +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + frame_mv[refs[id]].as_mv = *best_mv; last_besterr[id] = bestsme; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) last_besterr[!id] = last_besterr[id]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } else { break; } @@ -5453,40 +6409,92 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, *rate_mv = 0; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { +#else for (ref = 0; ref < 2; ++ref) { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF if (scaled_ref_frame[ref]) { // Restore the prediction frame pointers to their unscaled versions. int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[ref] = backup_yv12[ref][i]; } - av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx); + +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); + else +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx); + +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) { + // NOTE: For single ref comp mode, i.e. !has_second_ref(mbmi) is true, the + // first mv is stored in frame_mv[] and the second mv is stored in + // frame_comp_mv[]. + if (compound_ref0_mode(mbmi->mode) == NEWMV) // SR_NEW_NEWMV + *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv, + &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + assert(compound_ref1_mode(mbmi->mode) == NEWMV); + *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv, + &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + } else { +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF #if CONFIG_EXT_INTER && !CONFIG_CB4X4 - if (bsize >= BLOCK_8X8) + if (bsize >= BLOCK_8X8) #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4 - *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv, - &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv, + &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); #if CONFIG_EXT_INTER && !CONFIG_CB4X4 - else - *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv, - &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); + else + *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv, + &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost, + x->mvcost, MV_COST_WEIGHT); #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4 +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } + +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) { + if (scaled_ref_frame[0]) { + // Restore the prediction frame pointers to their unscaled versions. + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = backup_yv12[1][i]; + } + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } -static void estimate_ref_frame_costs(const AV1_COMMON *cm, - const MACROBLOCKD *xd, int segment_id, - unsigned int *ref_costs_single, - unsigned int *ref_costs_comp, - aom_prob *comp_mode_p) { +static void estimate_ref_frame_costs( + const AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id, + unsigned int *ref_costs_single, +#if CONFIG_EXT_COMP_REFS + unsigned int (*ref_costs_comp)[TOTAL_REFS_PER_FRAME], +#else + unsigned int *ref_costs_comp, +#endif // CONFIG_EXT_COMP_REFS + aom_prob *comp_mode_p) { int seg_ref_active = segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); if (seg_ref_active) { memset(ref_costs_single, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single)); +#if CONFIG_EXT_COMP_REFS + int ref_frame; + for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame) + memset(ref_costs_comp[ref_frame], 0, + TOTAL_REFS_PER_FRAME * sizeof((*ref_costs_comp)[0])); +#else memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp)); +#endif // CONFIG_EXT_COMP_REFS + *comp_mode_p = 128; } else { aom_prob intra_inter_p = av1_get_intra_inter_prob(cm, xd); @@ -5541,7 +6549,7 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm, ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0); ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1); -#else +#else // !CONFIG_EXT_REFS ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0); ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1); ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1); @@ -5570,6 +6578,63 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm, unsigned int base_cost = av1_cost_bit(intra_inter_p, 1); +#if CONFIG_EXT_COMP_REFS + aom_prob comp_ref_type_p = av1_get_comp_reference_type_prob(cm, xd); + unsigned int ref_bicomp_costs[TOTAL_REFS_PER_FRAME] = { 0 }; + + ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] = + ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] = +#if USE_UNI_COMP_REFS + base_cost + av1_cost_bit(comp_ref_type_p, 1); +#else + base_cost; +#endif // USE_UNI_COMP_REFS + ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF_FRAME] = 0; + + ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0); + ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0); + ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1); + ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1); + + ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1); + ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0); + + ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0); + ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1); + + ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0); + ref_bicomp_costs[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1); + + int ref0; + for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { + ref_costs_comp[ref0][BWDREF_FRAME] = + ref_bicomp_costs[ref0] + ref_bicomp_costs[BWDREF_FRAME]; + ref_costs_comp[ref0][ALTREF_FRAME] = + ref_bicomp_costs[ref0] + ref_bicomp_costs[ALTREF_FRAME]; + } + + aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd); + aom_prob uni_comp_ref_p1 = av1_get_pred_prob_uni_comp_ref_p1(cm, xd); + aom_prob uni_comp_ref_p2 = av1_get_pred_prob_uni_comp_ref_p2(cm, xd); + + ref_costs_comp[LAST_FRAME][LAST2_FRAME] = + base_cost + av1_cost_bit(comp_ref_type_p, 0) + + av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 0); + ref_costs_comp[LAST_FRAME][LAST3_FRAME] = + base_cost + av1_cost_bit(comp_ref_type_p, 0) + + av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) + + av1_cost_bit(uni_comp_ref_p2, 0); + ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = + base_cost + av1_cost_bit(comp_ref_type_p, 0) + + av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) + + av1_cost_bit(uni_comp_ref_p2, 1); + + ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = + base_cost + av1_cost_bit(comp_ref_type_p, 0) + + av1_cost_bit(uni_comp_ref_p, 1); + +#else // !CONFIG_EXT_COMP_REFS + ref_costs_comp[LAST_FRAME] = #if CONFIG_EXT_REFS ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] = @@ -5596,11 +6661,23 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm, // more bit. ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0); ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1); -#else +#else // !CONFIG_EXT_REFS ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0); ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1); #endif // CONFIG_EXT_REFS +#endif // CONFIG_EXT_COMP_REFS } else { +#if CONFIG_EXT_COMP_REFS + int ref0; + for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { + ref_costs_comp[ref0][BWDREF_FRAME] = 512; + ref_costs_comp[ref0][ALTREF_FRAME] = 512; + } + ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512; + ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512; + ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512; + ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512; +#else // !CONFIG_EXT_COMP_REFS ref_costs_comp[LAST_FRAME] = 512; #if CONFIG_EXT_REFS ref_costs_comp[LAST2_FRAME] = 512; @@ -5609,6 +6686,7 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm, ref_costs_comp[ALTREF_FRAME] = 512; #endif // CONFIG_EXT_REFS ref_costs_comp[GOLDEN_FRAME] = 512; +#endif // CONFIG_EXT_COMP_REFS } } } @@ -5693,8 +6771,13 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, int sadpb = x->sadperbit16; MV mvp_full; #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + int ref = + has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0]; +#else // !CONFIG_COMPOUND_SINGLEREF int ref = mbmi->ref_frame[ref_idx]; -#else +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER int ref = mbmi->ref_frame[0]; int ref_idx = 0; #endif // CONFIG_EXT_INTER @@ -5802,7 +6885,7 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv, &(x->best_mv.as_mv), 0); break; - default: assert("Invalid motion mode!\n"); + default: assert(0 && "Invalid motion mode!\n"); } #endif // CONFIG_MOTION_VAR @@ -5820,17 +6903,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, x->second_best_mv.as_int != x->best_mv.as_int; const int pw = block_size_wide[bsize]; const int ph = block_size_high[bsize]; - // Use up-sampled reference frames. - struct macroblockd_plane *const pd = &xd->plane[0]; - struct buf_2d backup_pred = pd->pre[ref_idx]; - const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); - - // Set pred for Y plane - setup_pred_plane( - &pd->pre[ref_idx], bsize, upsampled_ref->y_buffer, - upsampled_ref->y_crop_width, upsampled_ref->y_crop_height, - upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL, - pd->subsampling_x, pd->subsampling_y); best_mv_var = cpi->find_fractional_mv_step( x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, @@ -5873,9 +6945,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, x->best_mv.as_mv = best_mv; } } - - // Restore the reference frames. - pd->pre[ref_idx] = backup_pred; } else { cpi->find_fractional_mv_step( x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, @@ -5891,13 +6960,12 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, break; case OBMC_CAUSAL: av1_find_best_obmc_sub_pixel_tree_up( - cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv, - cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0, - cpi->sf.use_upsampled_references); + x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv, + x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, + &x->pred_sse[ref], 0, cpi->sf.use_upsampled_references); break; - default: assert("Invalid motion mode!\n"); + default: assert(0 && "Invalid motion mode!\n"); } #endif // CONFIG_MOTION_VAR } @@ -5936,15 +7004,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, const int ph = block_size_high[bsize]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; +#if CONFIG_COMPOUND_SINGLEREF + const int other_ref = + has_second_ref(mbmi) ? mbmi->ref_frame[!ref_idx] : mbmi->ref_frame[0]; +#else // !CONFIG_COMPOUND_SINGLEREF const int other_ref = mbmi->ref_frame[!ref_idx]; -#if CONFIG_DUAL_FILTER - InterpFilter interp_filter[2] = { - (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0], - (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1] - }; -#else - const InterpFilter interp_filter = mbmi->interp_filter; -#endif // CONFIG_DUAL_FILTER +#endif // CONFIG_COMPOUND_SINGLEREF struct scale_factors sf; #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION struct macroblockd_plane *const pd = &xd->plane[0]; @@ -5961,8 +7026,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, (void)block; #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - // This function should only ever be called for compound modes +// This function should only ever be called for compound modes +#if CONFIG_COMPOUND_SINGLEREF + assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode)); +#else // !CONFIG_COMPOUND_SINGLEREF assert(has_second_ref(mbmi)); +#endif // CONFIG_COMPOUND_SINGLEREF struct buf_2d backup_yv12[MAX_MB_PLANE]; const YV12_BUFFER_CONFIG *const scaled_ref_frame = @@ -5991,7 +7060,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, struct buf_2d ref_yv12; const int plane = 0; - ConvolveParams conv_params = get_conv_params(0, plane); + ConvolveParams conv_params = get_conv_params(!ref_idx, 0, plane); #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION WarpTypesAllowed warp_types; #if CONFIG_GLOBAL_MOTION @@ -6010,7 +7079,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { av1_highbd_build_inter_predictor( ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph, - 0, interp_filter, + 0, mbmi->interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION &warp_types, p_col, p_row, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -6019,7 +7088,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, #endif // CONFIG_HIGHBITDEPTH av1_build_inter_predictor( ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph, - &conv_params, interp_filter, + &conv_params, mbmi->interp_filter, #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION &warp_types, p_col, p_row, plane, !ref_idx, #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION @@ -6038,15 +7107,22 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, // Search for the best mv for one component of a compound, // given that the other component is fixed. -static void compound_single_motion_search( - const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv, - int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int *rate_mv, const int block, int ref_idx) { +static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, MV *this_mv, + int mi_row, int mi_col, + const uint8_t *second_pred, + const uint8_t *mask, int mask_stride, + int *rate_mv, int ref_idx) { const int pw = block_size_wide[bsize]; const int ph = block_size_high[bsize]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; +#if CONFIG_COMPOUND_SINGLEREF + const int ref = + has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0]; +#else const int ref = mbmi->ref_frame[ref_idx]; +#endif // CONFIG_COMPOUND_SINGLEREF int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0]; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -6054,9 +7130,16 @@ static void compound_single_motion_search( const YV12_BUFFER_CONFIG *const scaled_ref_frame = av1_get_scaled_ref_frame(cpi, ref); - // Check that this is either an interinter or an interintra block +// Check that this is either an interinter or an interintra block +#if CONFIG_COMPOUND_SINGLEREF assert(has_second_ref(mbmi) || + // or a single ref comp pred mode + is_inter_singleref_comp_mode(mbmi->mode) || (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME)); +#else + assert(has_second_ref(mbmi) || + (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME)); +#endif // CONFIG_COMPOUND_SINGLEREF if (scaled_ref_frame) { int i; @@ -6091,7 +7174,12 @@ static void compound_single_motion_search( best_mv->col >>= 3; best_mv->row >>= 3; - av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx); + else +#endif // CONFIG_COMPOUND_SINGLEREF + av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); // Small-range full-pixel motion search. bestsme = av1_refining_search_8p_c(x, sadpb, search_range, @@ -6112,43 +7200,11 @@ static void compound_single_motion_search( if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - if (cpi->sf.use_upsampled_references) { - // Use up-sampled reference frames. - struct buf_2d backup_pred = pd->pre[0]; - const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); - - // Set pred for Y plane - setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer, - upsampled_ref->y_crop_width, - upsampled_ref->y_crop_height, upsampled_ref->y_stride, - (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x, - pd->subsampling_y); - -// If bsize < BLOCK_8X8, adjust pred pointer for this block -#if !CONFIG_CB4X4 - if (bsize < BLOCK_8X8) - pd->pre[0].buf = - &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block, - pd->pre[0].stride)) - << 3]; -#endif // !CONFIG_CB4X4 - - bestsme = cpi->find_fractional_mv_step( - x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, - x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, - mask_stride, ref_idx, pw, ph, 1); - - // Restore the reference frames. - pd->pre[0] = backup_pred; - } else { - (void)block; - bestsme = cpi->find_fractional_mv_step( - x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, - x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, - mask_stride, ref_idx, pw, ph, 0); - } + bestsme = cpi->find_fractional_mv_step( + x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, + x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride, + ref_idx, pw, ph, cpi->sf.use_upsampled_references); } // Restore the pointer to the first (possibly scaled) prediction buffer. @@ -6165,7 +7221,12 @@ static void compound_single_motion_search( xd->plane[i].pre[ref_idx] = backup_yv12[i]; } - av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx); + else +#endif // CONFIG_COMPOUND_SINGLEREF + av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } @@ -6174,13 +7235,23 @@ static void compound_single_motion_search( // where the second prediction is also an inter mode. static void compound_single_motion_search_interinter( const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + int_mv *frame_comp_mv, +#endif // CONFIG_COMPOUND_SINGLEREF int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv, const int block, int ref_idx) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - // This function should only ever be called for compound modes +// This function should only ever be called for compound modes +#if CONFIG_COMPOUND_SINGLEREF + int is_singleref_comp_mode = + !has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode); + assert(has_second_ref(mbmi) || is_singleref_comp_mode); + if (is_singleref_comp_mode && ref_idx) assert(frame_comp_mv); +#else // !CONFIG_COMPOUND_SINGLEREF assert(has_second_ref(mbmi)); +#endif // CONFIG_COMPOUND_SINGLEREF // Prediction buffer from second frame. #if CONFIG_HIGHBITDEPTH @@ -6194,14 +7265,26 @@ static void compound_single_motion_search_interinter( DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]); #endif // CONFIG_HIGHBITDEPTH +#if CONFIG_COMPOUND_SINGLEREF + MV *this_mv = has_second_ref(mbmi) + ? &frame_mv[mbmi->ref_frame[ref_idx]].as_mv + : (ref_idx ? &frame_comp_mv[mbmi->ref_frame[0]].as_mv + : &frame_mv[mbmi->ref_frame[0]].as_mv); + const MV *other_mv = + has_second_ref(mbmi) + ? &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv + : (ref_idx ? &frame_mv[mbmi->ref_frame[0]].as_mv + : &frame_comp_mv[mbmi->ref_frame[0]].as_mv); +#else // !CONFIG_COMPOUND_SINGLEREF MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv; const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv; +#endif // CONFIG_COMPOUND_SINGLEREF build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block, ref_idx, second_pred); compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col, - second_pred, mask, mask_stride, rate_mv, block, + second_pred, mask, mask_stride, rate_mv, ref_idx); } @@ -6220,21 +7303,40 @@ static void do_masked_motion_search_indexed( mask = av1_get_compound_type_mask(comp_data, sb_type); int_mv frame_mv[TOTAL_REFS_PER_FRAME]; +#if CONFIG_COMPOUND_SINGLEREF + int_mv frame_comp_mv[TOTAL_REFS_PER_FRAME]; +#endif // CONFIG_COMPOUND_SINGLEREF MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] }; assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4); frame_mv[rf[0]].as_int = cur_mv[0].as_int; - frame_mv[rf[1]].as_int = cur_mv[1].as_int; +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + frame_comp_mv[rf[0]].as_int = cur_mv[1].as_int; + else +#endif // CONFIG_COMPOUND_SINGLEREF + frame_mv[rf[1]].as_int = cur_mv[1].as_int; if (which == 0 || which == 1) { - compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row, - mi_col, mask, mask_stride, rate_mv, - 0, which); + compound_single_motion_search_interinter( + cpi, x, bsize, frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + has_second_ref(mbmi) ? NULL : frame_comp_mv, +#endif // CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, mask, mask_stride, rate_mv, 0, which); } else if (which == 2) { - joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask, - mask_stride, rate_mv, 0); + joint_motion_search(cpi, x, bsize, frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + has_second_ref(mbmi) ? NULL : frame_comp_mv, +#endif // CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, NULL, mask, mask_stride, rate_mv, 0); } tmp_mv[0].as_int = frame_mv[rf[0]].as_int; - tmp_mv[1].as_int = frame_mv[rf[1]].as_int; +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + tmp_mv[1].as_int = frame_comp_mv[rf[0]].as_int; + else // comp ref +#endif // CONFIG_COMPOUND_SINGLEREF + tmp_mv[1].as_int = frame_mv[rf[1]].as_int; } #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE #endif // CONFIG_EXT_INTER @@ -6483,7 +7585,7 @@ static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x, sse = ROUND_POWER_OF_TWO(sse, bd_round); model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist); - rd = RDCOST(x->rdmult, x->rddiv, rate, dist); + rd = RDCOST(x->rdmult, rate, dist); if (rd < best_rd) { *best_wedge_index = wedge_index; @@ -6544,7 +7646,7 @@ static int64_t pick_wedge_fixed_sign( sse = ROUND_POWER_OF_TWO(sse, bd_round); model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist); - rd = RDCOST(x->rdmult, x->rddiv, rate, dist); + rd = RDCOST(x->rdmult, rate, dist); if (rd < best_rd) { *best_wedge_index = wedge_index; @@ -6646,7 +7748,7 @@ static int64_t pick_interinter_seg(const AV1_COMP *const cpi, sse = ROUND_POWER_OF_TWO(sse, bd_round); model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist); - rd0 = RDCOST(x->rdmult, x->rddiv, rate, dist); + rd0 = RDCOST(x->rdmult, rate, dist); if (rd0 < best_rd) { best_mask_type = cur_mask_type; @@ -6729,7 +7831,17 @@ static int interinter_compound_motion_search( #endif // CONFIG_COMPOUND_SEGMENT mbmi->interinter_compound_type }; - if (this_mode == NEW_NEWMV) { +#if CONFIG_COMPOUND_SINGLEREF + // NOTE: Mode is needed to identify the compound mode prediction, regardless + // of comp refs or single ref. + mbmi->mode = this_mode; +#endif // CONFIG_COMPOUND_SINGLEREF + + if (this_mode == NEW_NEWMV +#if CONFIG_COMPOUND_SINGLEREF + || this_mode == SR_NEW_NEWMV +#endif // CONFIG_COMPOUND_SINGLEREF + ) { do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2); mbmi->mv[0].as_int = tmp_mv[0].as_int; @@ -6738,7 +7850,12 @@ static int interinter_compound_motion_search( do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0); mbmi->mv[0].as_int = tmp_mv[0].as_int; - } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { + } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV +#if CONFIG_COMPOUND_SINGLEREF + // || this_mode == SR_NEAREST_NEWMV + || this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV +#endif // CONFIG_COMPOUND_SINGLEREF + ) { do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1); mbmi->mv[1].as_int = tmp_mv[1].as_int; @@ -6763,7 +7880,7 @@ static int64_t build_and_cost_compound_type( const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type; best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1); - best_rd_cur += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0); + best_rd_cur += RDCOST(x->rdmult, rs2 + rate_mv, 0); if (have_newmv_in_inter_mode(this_mode) && use_masked_motion_search(compound_type)) { @@ -6772,7 +7889,7 @@ static int64_t build_and_cost_compound_type( av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); - rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum); if (rd >= best_rd_cur) { mbmi->mv[0].as_int = cur_mv[0].as_int; mbmi->mv[1].as_int = cur_mv[1].as_int; @@ -6788,7 +7905,7 @@ static int64_t build_and_cost_compound_type( rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum); best_rd_cur = rd; } else { @@ -6801,7 +7918,7 @@ static int64_t build_and_cost_compound_type( rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum); best_rd_cur = rd; } return best_rd_cur; @@ -6832,6 +7949,9 @@ typedef struct { static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize, int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME], +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv (*const mode_comp_mv)[TOTAL_REFS_PER_FRAME], +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF const int mi_row, const int mi_col, int *const rate_mv, int_mv *const single_newmv, HandleInterModeArgs *const args) { @@ -6844,6 +7964,9 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); #endif // CONFIG_EXT_INTER int_mv *const frame_mv = mode_mv[this_mode]; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv *const frame_comp_mv = mode_comp_mv[this_mode]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; int i; @@ -6861,8 +7984,11 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { - joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL, - 0, rate_mv, 0); + joint_motion_search(cpi, x, bsize, frame_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + NULL, // int_mv *frame_comp_mv +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, NULL, NULL, 0, rate_mv, 0); } else { *rate_mv = 0; for (i = 0; i < 2; ++i) { @@ -6877,8 +8003,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { frame_mv[refs[0]].as_int = mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int; - compound_single_motion_search_interinter( - cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1); + compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + NULL, +#endif // CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, NULL, 0, + rate_mv, 0, 1); } else { av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx); *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv, @@ -6891,8 +8021,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { frame_mv[refs[1]].as_int = mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int; - compound_single_motion_search_interinter( - cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0); + compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, +#if CONFIG_COMPOUND_SINGLEREF + NULL, +#endif // CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, NULL, 0, + rate_mv, 0, 0); } else { av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv, @@ -6900,7 +8034,7 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } } -#else +#else // !CONFIG_EXT_INTER // Initialize mv using single prediction mode result. frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; @@ -6917,6 +8051,41 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, } } #endif // CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(this_mode)) { + // Single ref comp mode + const int mode0 = compound_ref0_mode(this_mode); + + single_newmv[refs[0]].as_int = args->single_newmv[refs[0]].as_int; + frame_mv[refs[0]].as_int = (mode0 == NEWMV) + ? single_newmv[refs[0]].as_int + : mode_mv[mode0][refs[0]].as_int; + assert(compound_ref1_mode(this_mode) == NEWMV); + frame_comp_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; + + if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + if (this_mode == SR_NEW_NEWMV) { + joint_motion_search(cpi, x, bsize, frame_mv, frame_comp_mv, mi_row, + mi_col, NULL, NULL, 0, rate_mv, 0); + } else { + assert( // this_mode == SR_NEAREST_NEWMV || + this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV); + compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, + frame_comp_mv, mi_row, mi_col, + NULL, 0, rate_mv, 0, 1); + } + } else { + *rate_mv = 0; + av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); + if (mode0 == NEWMV) + *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv, + &mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv, + &mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } else { #if CONFIG_EXT_INTER if (is_comp_interintra_pred) { @@ -6984,7 +8153,7 @@ int64_t interpolation_filter_search( av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, skip_txfm_sb, skip_sse_sb); - *rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist); + *rd = RDCOST(x->rdmult, *switchable_rate + tmp_rate, tmp_dist); if (assign_filter == SWITCHABLE) { // do interp_filter search @@ -7020,7 +8189,7 @@ int64_t interpolation_filter_search( av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, &tmp_skip_sb, &tmp_skip_sse); - tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist); + tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist); if (tmp_rd < *rd) { *rd = tmp_rd; @@ -7072,12 +8241,10 @@ static int64_t motion_mode_rd( int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd, const int *refs, int rate_mv, #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION + // only used when WARPED_MOTION is on? int_mv *const single_newmv, #if CONFIG_EXT_INTER - int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi, -#if CONFIG_MOTION_VAR - int rate_mv_bmc, -#endif // CONFIG_MOTION_VAR + int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc, #endif // CONFIG_EXT_INTER #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) { @@ -7108,7 +8275,13 @@ static int64_t motion_mode_rd( #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_WARPED_MOTION +#if WARPED_MOTION_SORT_SAMPLES + int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE]; + int pts_mv0[SAMPLES_ARRAY_SIZE]; + int total_samples; +#else int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; +#endif // WARPED_MOTION_SORT_SAMPLES #endif // CONFIG_WARPED_MOTION #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION @@ -7118,18 +8291,39 @@ static int64_t motion_mode_rd( if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs; #if CONFIG_WARPED_MOTION aom_clear_system_state(); +#if WARPED_MOTION_SORT_SAMPLES + mbmi->num_proj_ref[0] = + findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0); + total_samples = mbmi->num_proj_ref[0]; +#else mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref); +#endif // WARPED_MOTION_SORT_SAMPLES #if CONFIG_EXT_INTER best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0]; #endif // CONFIG_EXT_INTER #endif // CONFIG_WARPED_MOTION #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION rate2_nocoeff = rd_stats->rate; +#if CONFIG_NCOBMC_ADAPT_WEIGHT + // We cannot estimate the rd cost for the motion mode NCOBMC_ADAPT_WEIGHT + // right now since it requires mvs from all neighboring blocks. We will + // check if this mode is beneficial after all the mv's in the current + // superblock are selected. + last_motion_mode_allowed = motion_mode_allowed_wrapper(1, +#if CONFIG_GLOBAL_MOTION + 0, xd->global_motion, +#endif // CONFIG_GLOBAL_MOTION + mi); +#else last_motion_mode_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, xd->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif mi); +#endif // CONFIG_NCOBMC_ADAPT_WEIGHT base_mbmi = *mbmi; #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION @@ -7155,7 +8349,11 @@ static int64_t motion_mode_rd( *mbmi = *best_bmc_mbmi; mbmi->motion_mode = OBMC_CAUSAL; #endif // CONFIG_EXT_INTER - if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) { + if (!is_comp_pred && +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + !is_inter_singleref_comp_mode(this_mode) && +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + have_newmv_in_inter_mode(this_mode)) { int tmp_rate_mv = 0; single_motion_search(cpi, x, bsize, mi_row, mi_col, @@ -7195,6 +8393,9 @@ static int64_t motion_mode_rd( #if CONFIG_WARPED_MOTION if (mbmi->motion_mode == WARPED_CAUSAL) { +#if WARPED_MOTION_SORT_SAMPLES + int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; +#endif // WARPED_MOTION_SORT_SAMPLES #if CONFIG_EXT_INTER *mbmi = *best_bmc_mbmi; mbmi->motion_mode = WARPED_CAUSAL; @@ -7210,6 +8411,19 @@ static int64_t motion_mode_rd( : cm->interp_filter; #endif // CONFIG_DUAL_FILTER +#if WARPED_MOTION_SORT_SAMPLES + memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); + memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); + // Rank the samples by motion vector difference + if (mbmi->num_proj_ref[0] > 1) { + mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts, + pts_inref, mbmi->num_proj_ref[0]); +#if CONFIG_EXT_INTER + best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0]; +#endif // CONFIG_EXT_INTER + } +#endif // WARPED_MOTION_SORT_SAMPLES + if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, &mbmi->wm_params[0], mi_row, mi_col)) { @@ -7218,9 +8432,16 @@ static int64_t motion_mode_rd( int tmp_rate_mv = 0; const int_mv mv0 = mbmi->mv[0]; WarpedMotionParams wm_params0 = mbmi->wm_params[0]; +#if WARPED_MOTION_SORT_SAMPLES + int num_proj_ref0 = mbmi->num_proj_ref[0]; // Refine MV in a small range. + av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0, + pts_mv0, total_samples); +#else + // Refine MV in a small range. av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref); +#endif // WARPED_MOTION_SORT_SAMPLES // Keep the refined MV and WM parameters. if (mv0.as_int != mbmi->mv[0].as_int) { @@ -7241,6 +8462,9 @@ static int64_t motion_mode_rd( tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); } #if CONFIG_EXT_INTER +#if WARPED_MOTION_SORT_SAMPLES + best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0]; +#endif // WARPED_MOTION_SORT_SAMPLES tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv; #else tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv; @@ -7255,6 +8479,9 @@ static int64_t motion_mode_rd( // Restore the old MV and WM parameters. mbmi->mv[0] = mv0; mbmi->wm_params[0] = wm_params0; +#if WARPED_MOTION_SORT_SAMPLES + mbmi->num_proj_ref[0] = num_proj_ref0; +#endif // WARPED_MOTION_SORT_SAMPLES } } @@ -7328,8 +8555,8 @@ static int64_t motion_mode_rd( av1_merge_rd_stats(rd_stats, rd_stats_y); - rdcosty = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist); - rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse)); + rdcosty = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); + rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, 0, rd_stats->sse)); /* clang-format off */ #if CONFIG_VAR_TX is_cost_valid_uv = @@ -7365,12 +8592,11 @@ static int64_t motion_mode_rd( mbmi->skip = 0; // here mbmi->skip temporarily plays a role as what this_skip2 does } else if (!xd->lossless[mbmi->segment_id] && - (RDCOST(x->rdmult, x->rddiv, + (RDCOST(x->rdmult, rd_stats_y->rate + rd_stats_uv->rate + av1_cost_bit(av1_get_skip_prob(cm, xd), 0), rd_stats->dist) >= - RDCOST(x->rdmult, x->rddiv, - av1_cost_bit(av1_get_skip_prob(cm, xd), 1), + RDCOST(x->rdmult, av1_cost_bit(av1_get_skip_prob(cm, xd), 1), rd_stats->sse))) { rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate; rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1); @@ -7427,7 +8653,7 @@ static int64_t motion_mode_rd( #endif // CONFIG_GLOBAL_MOTION #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - tmp_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist); + tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) { best_mbmi = *mbmi; best_rd = tmp_rd; @@ -7466,11 +8692,17 @@ static int64_t motion_mode_rd( return 0; } -static int64_t handle_inter_mode( - const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, - int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row, - int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) { +static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, RD_STATS *rd_stats, + RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, + int *disable_skip, + int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv (*mode_comp_mv)[TOTAL_REFS_PER_FRAME], +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int mi_row, int mi_col, + HandleInterModeArgs *args, + const int64_t ref_best_rd) { const AV1_COMMON *cm = &cpi->common; (void)cm; MACROBLOCKD *xd = &x->e_mbd; @@ -7479,7 +8711,14 @@ static int64_t handle_inter_mode( MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const int is_comp_pred = has_second_ref(mbmi); const int this_mode = mbmi->mode; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + const int is_singleref_comp_mode = is_inter_singleref_comp_mode(this_mode); +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF int_mv *frame_mv = mode_mv[this_mode]; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // The comp mv for the compound mode in single ref + int_mv *frame_comp_mv = mode_comp_mv[this_mode]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF int i; int refs[2] = { mbmi->ref_frame[0], (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; @@ -7487,7 +8726,7 @@ static int64_t handle_inter_mode( int rate_mv = 0; #if CONFIG_EXT_INTER int pred_exists = 1; -#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT +#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA const int bw = block_size_wide[bsize]; #endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT int_mv single_newmv[TOTAL_REFS_PER_FRAME]; @@ -7511,9 +8750,7 @@ static int64_t handle_inter_mode( #if CONFIG_EXT_INTER int rate2_bmc_nocoeff; MB_MODE_INFO best_bmc_mbmi; -#if CONFIG_MOTION_VAR int rate_mv_bmc; -#endif // CONFIG_MOTION_VAR #endif // CONFIG_EXT_INTER #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION int64_t rd = INT64_MAX; @@ -7523,6 +8760,11 @@ static int64_t handle_inter_mode( int skip_txfm_sb = 0; int64_t skip_sse_sb = INT64_MAX; int16_t mode_ctx; +#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR + // dummy fillers + mbmi->ncobmc_mode[0] = NO_OVERLAP; + mbmi->ncobmc_mode[1] = NO_OVERLAP; +#endif #if CONFIG_EXT_INTER #if CONFIG_INTERINTRA @@ -7546,7 +8788,11 @@ static int64_t handle_inter_mode( #endif // CONFIG_EXT_INTER #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if (is_comp_pred || is_singleref_comp_mode) +#else // !CONFIG_COMPOUND_SINGLEREF if (is_comp_pred) +#endif // CONFIG_COMPOUND_SINGLEREF mode_ctx = mbmi_ext->compound_mode_context[refs[0]]; else #endif // CONFIG_EXT_INTER @@ -7572,12 +8818,22 @@ static int64_t handle_inter_mode( if (frame_mv[refs[0]].as_int == INVALID_MV || frame_mv[refs[1]].as_int == INVALID_MV) return INT64_MAX; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + } else if (is_singleref_comp_mode) { + if (frame_mv[refs[0]].as_int == INVALID_MV || + frame_comp_mv[refs[0]].as_int == INVALID_MV) + return INT64_MAX; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF } mbmi->motion_mode = SIMPLE_TRANSLATION; if (have_newmv_in_inter_mode(this_mode)) { - const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col, - &rate_mv, single_newmv, args); + const int64_t ret_val = + handle_newmv(cpi, x, bsize, mode_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + mode_comp_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, &rate_mv, single_newmv, args); if (ret_val != 0) return ret_val; else @@ -7591,6 +8847,16 @@ static int64_t handle_inter_mode( mbmi->mv[i].as_int = cur_mv[i].as_int; } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!is_comp_pred && is_singleref_comp_mode) { + cur_mv[1] = frame_comp_mv[refs[0]]; + // Clip "next_nearest" so that it does not extend to far out of image + if (this_mode != NEWMV) clamp_mv2(&cur_mv[1].as_mv, xd); + if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX; + mbmi->mv[1].as_int = cur_mv[1].as_int; + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_EXT_INTER if (this_mode == NEAREST_NEARESTMV) #else @@ -7614,7 +8880,13 @@ static int64_t handle_inter_mode( #if CONFIG_EXT_INTER if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) { - if (this_mode == NEAREST_NEWMV) { +#if CONFIG_COMPOUND_SINGLEREF + if (this_mode == NEAREST_NEWMV || // this_mode == SR_NEAREST_NEWMV || + this_mode == SR_NEAREST_NEARMV) +#else // !CONFIG_COMPOUND_SINGLEREF + if (this_mode == NEAREST_NEWMV) +#endif // CONFIG_COMPOUND_SINGLEREF + { cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv; lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv); @@ -7635,7 +8907,11 @@ static int64_t handle_inter_mode( if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) { int ref_mv_idx = mbmi->ref_mv_idx + 1; - if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) { + if (this_mode == NEAR_NEWMV || +#if CONFIG_COMPOUND_SINGLEREF + this_mode == SR_NEAR_NEWMV || +#endif // CONFIG_COMPOUND_SINGLEREF + this_mode == NEAR_NEARMV) { cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv); @@ -7644,8 +8920,17 @@ static int64_t handle_inter_mode( mbmi->mv[0].as_int = cur_mv[0].as_int; } - if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) { - cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; + if (this_mode == NEW_NEARMV || +#if CONFIG_COMPOUND_SINGLEREF + this_mode == SR_NEAREST_NEARMV || +#endif // CONFIG_COMPOUND_SINGLEREF + this_mode == NEAR_NEARMV) { +#if CONFIG_COMPOUND_SINGLEREF + if (this_mode == SR_NEAREST_NEARMV) + cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; + else +#endif // CONFIG_COMPOUND_SINGLEREF + cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv); clamp_mv2(&cur_mv[1].as_mv, xd); @@ -7653,7 +8938,7 @@ static int64_t handle_inter_mode( mbmi->mv[1].as_int = cur_mv[1].as_int; } } -#else +#else // !CONFIG_EXT_INTER if (this_mode == NEARMV && is_comp_pred) { uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) { @@ -7706,7 +8991,7 @@ static int64_t handle_inter_mode( rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx); } - if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, 0) > ref_best_rd && + if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && #if CONFIG_EXT_INTER mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV #else @@ -7725,13 +9010,16 @@ static int64_t handle_inter_mode( best_bmc_mbmi = *mbmi; rate2_bmc_nocoeff = rd_stats->rate; if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs; -#if CONFIG_MOTION_VAR rate_mv_bmc = rate_mv; -#endif // CONFIG_MOTION_VAR #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT - if (is_comp_pred) { +#if CONFIG_COMPOUND_SINGLEREF + if (is_comp_pred || is_singleref_comp_mode) +#else + if (is_comp_pred) +#endif // CONFIG_COMPOUND_SINGLEREF + { int rate_sum, rs2; int64_t dist_sum; int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX; @@ -7741,8 +9029,8 @@ static int64_t handle_inter_mode( int tmp_skip_txfm_sb; int64_t tmp_skip_sse_sb; int compound_type_cost[COMPOUND_TYPES]; - uint8_t pred0[2 * MAX_SB_SQUARE]; - uint8_t pred1[2 * MAX_SB_SQUARE]; + DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]); uint8_t *preds0[1] = { pred0 }; uint8_t *preds1[1] = { pred1 }; int strides[1] = { bw }; @@ -7761,6 +9049,17 @@ static int64_t handle_inter_mode( best_compound_data.seg_mask = tmp_mask_buf; #endif // CONFIG_COMPOUND_SEGMENT +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // TODO(zoeliu): To further check whether the following setups are needed. + // Single ref compound mode: Prepare the 2nd ref frame predictor the same as + // the 1st one. + if (!is_comp_pred && is_singleref_comp_mode) { + xd->block_refs[1] = xd->block_refs[0]; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = xd->plane[i].pre[0]; + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (masked_compound_used) { av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize], av1_compound_type_tree); @@ -7773,7 +9072,7 @@ static int64_t handle_inter_mode( for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) { if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break; - if (!is_interinter_compound_used(cur_type, bsize)) break; + if (!is_interinter_compound_used(cur_type, bsize)) continue; tmp_rate_mv = rate_mv; best_rd_cur = INT64_MAX; mbmi->interinter_compound_type = cur_type; @@ -7792,8 +9091,7 @@ static int64_t handle_inter_mode( &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - best_rd_cur = - RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum); + best_rd_cur = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum); best_rd_compound = best_rd_cur; break; #if CONFIG_WEDGE @@ -7923,8 +9221,7 @@ static int64_t handle_inter_mode( av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); - rd = - RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum); + rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum); if (rd < best_interintra_rd) { best_interintra_rd = rd; best_interintra_mode = mbmi->interintra_mode; @@ -7939,7 +9236,7 @@ static int64_t handle_inter_mode( rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum); best_interintra_rd = rd; if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) { @@ -7953,8 +9250,7 @@ static int64_t handle_inter_mode( int_mv tmp_mv; int rwedge = av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum, - dist_sum); + rd = RDCOST(x->rdmult, rmode + rate_mv + rwedge + rate_sum, dist_sum); best_interintra_rd_nowedge = best_interintra_rd; // Disable wedge search if source variance is small @@ -7968,7 +9264,7 @@ static int64_t handle_inter_mode( pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); best_interintra_rd_wedge += - RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge, 0); + RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0); // Refine motion vector. if (have_newmv_in_inter_mode(this_mode)) { // get negative of mask @@ -7977,14 +9273,14 @@ static int64_t handle_inter_mode( tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int; compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row, mi_col, intrapred, mask, bw, - &tmp_rate_mv, 0, 0); + &tmp_rate_mv, 0); mbmi->mv[0].as_int = tmp_mv.as_int; av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); - rd = RDCOST(x->rdmult, x->rddiv, - rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum, + dist_sum); if (rd >= best_interintra_rd_wedge) { tmp_mv.as_int = cur_mv[0].as_int; tmp_rate_mv = rate_mv; @@ -8000,8 +9296,8 @@ static int64_t handle_inter_mode( estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, x->rddiv, - rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum, + dist_sum); best_interintra_rd_wedge = rd; if (best_interintra_rd_wedge < best_interintra_rd_nowedge) { mbmi->use_wedge_interintra = 1; @@ -8042,7 +9338,7 @@ static int64_t handle_inter_mode( av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb); - rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); + rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist); } #endif // CONFIG_EXT_INTER @@ -8097,10 +9393,7 @@ static int64_t handle_inter_mode( #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION single_newmv, #if CONFIG_EXT_INTER - rate2_bmc_nocoeff, &best_bmc_mbmi, -#if CONFIG_MOTION_VAR - rate_mv_bmc, -#endif // CONFIG_MOTION_VAR + rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc, #endif // CONFIG_EXT_INTER #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst); @@ -8118,11 +9411,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; const TileInfo *tile = &xd->tile; -#if CONFIG_EC_ADAPT FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; -#else - FRAME_CONTEXT *const ec_ctx = cm->fc; -#endif // CONFIG_EC_ADAPT MODE_INFO *const mi = xd->mi[0]; const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE); const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE); @@ -8222,7 +9511,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, #endif mbmi->use_intrabc = 1; mbmi->mode = DC_PRED; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->mv[0].as_mv = dv; #if CONFIG_DUAL_FILTER for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR; @@ -8233,12 +9522,12 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, x->skip = 0; av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); + assert(x->mvcost == x->mv_cost_stack[0]); + // TODO(aconverse@google.com): The full motion field defining discount + // in MV_COST_WEIGHT is too large. Explore other values. int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); - const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0); - const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0); - const int rate_mode = cpi->y_mode_costs[A][L][DC_PRED] + - av1_cost_bit(ec_ctx->intrabc_prob, 1); + x->mvcost, MV_COST_WEIGHT_SUB); + const int rate_mode = av1_cost_bit(ec_ctx->intrabc_prob, 1); RD_STATS rd_stats, rd_stats_uv; av1_subtract_plane(x, bsize, 0); @@ -8267,8 +9556,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, rdc_noskip.rate = rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0); rdc_noskip.dist = rd_stats.dist; - rdc_noskip.rdcost = - RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist); + rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist); if (rdc_noskip.rdcost < best_rd) { best_rd = rdc_noskip.rdcost; best_mbmi = *mbmi; @@ -8282,7 +9570,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, av1_init_rd_stats(&rdc_skip); rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1); rdc_skip.dist = rd_stats.sse; - rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist); + rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist); if (rdc_skip.rdcost < best_rd) { best_rd = rdc_skip.rdcost; best_mbmi = *mbmi; @@ -8302,6 +9590,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int64_t best_rd) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; struct macroblockd_plane *const pd = xd->plane; int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; int y_skip = 0, uv_skip = 0; @@ -8310,11 +9599,11 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, const int unify_bsize = CONFIG_CB4X4; ctx->skip = 0; - xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; - xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME; + mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE_FRAME; #if CONFIG_INTRABC - xd->mi[0]->mbmi.use_intrabc = 0; - xd->mi[0]->mbmi.mv[0].as_int = 0; + mbmi->use_intrabc = 0; + mbmi->mv[0].as_int = 0; #endif // CONFIG_INTRABC const int64_t intra_yrd = @@ -8325,9 +9614,29 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, &dist_y, &y_skip, best_rd); if (intra_yrd < best_rd) { - max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size] - [pd[1].subsampling_x][pd[1].subsampling_y]; - init_sbuv_mode(&xd->mi[0]->mbmi); +#if CONFIG_CFL + // Perform one extra txfm_rd_in_plane() call, this time with the best value + // so we can store reconstructed luma values + RD_STATS this_rd_stats; + +#if CONFIG_CB4X4 + // Don't store the luma value if no chroma is associated. + // Don't worry, we will store this reconstructed luma in the following + // encode dry-run the chroma plane will never know. + x->cfl_store_y = !x->skip_chroma_rd; +#else + x->cfl_store_y = 1; +#endif + + txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y, + mbmi->sb_type, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + + x->cfl_store_y = 0; +#endif + max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x] + [pd[1].subsampling_y]; + init_sbuv_mode(mbmi); #if CONFIG_CB4X4 if (!x->skip_chroma_rd) rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, @@ -8346,8 +9655,8 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, rate_y + rate_uv + av1_cost_bit(av1_get_skip_prob(cm, xd), 0); rd_cost->dist = dist_y + dist_uv; } - rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 rd_cost->dist_y = dist_y; #endif } else { @@ -8360,7 +9669,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) { ctx->skip = x->skip; // FIXME where is the proper place to set this?! assert(rd_cost->rate != INT_MAX); - rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); + rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); } #endif if (rd_cost->rate == INT_MAX) return; @@ -8494,7 +9803,8 @@ static void pick_filter_intra_interframe( const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, - PREDICTION_MODE *mode_uv, FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv, + UV_PREDICTION_MODE *mode_uv, + FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv, #if CONFIG_EXT_INTRA int8_t *uv_angle_delta, #endif // CONFIG_EXT_INTRA @@ -8531,7 +9841,7 @@ static void pick_filter_intra_interframe( // TODO(huisu): use skip_mask for further speedup. (void)skip_mask; mbmi->mode = DC_PRED; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE_FRAME; if (!rd_pick_filter_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y, @@ -8600,7 +9910,8 @@ static void pick_filter_intra_interframe( rate2 += write_uniform_cost( FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]); #if CONFIG_EXT_INTRA - if (av1_is_directional_mode(mbmi->uv_mode, bsize)) { + if (av1_is_directional_mode(mbmi->uv_mode, bsize) && + av1_use_angle_delta(bsize)) { rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[1]); } @@ -8628,7 +9939,7 @@ static void pick_filter_intra_interframe( } else { rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); if (this_rd < *best_intra_rd) { *best_intra_rd = this_rd; @@ -8693,6 +10004,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, unsigned char segment_id = mbmi->segment_id; int comp_pred, i, k; int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME]; +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + int_mv frame_comp_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME]; +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]; int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } }; #if CONFIG_EXT_INTER @@ -8722,7 +10036,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int best_mode_skippable = 0; int midx, best_mode_index = -1; unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME]; +#if CONFIG_EXT_COMP_REFS + unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME]; +#else unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME]; +#endif // CONFIG_EXT_COMP_REFS aom_prob comp_mode_p; int64_t best_intra_rd = INT64_MAX; unsigned int best_pred_sse = UINT_MAX; @@ -8730,7 +10048,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL]; int64_t dist_uvs[TX_SIZES_ALL]; int skip_uvs[TX_SIZES_ALL]; - PREDICTION_MODE mode_uv[TX_SIZES_ALL]; + UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL]; #if CONFIG_PALETTE PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL]; #endif // CONFIG_PALETTE @@ -8747,7 +10065,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]]; int best_skip2 = 0; - uint8_t ref_frame_skip_mask[2] = { 0 }; + uint16_t ref_frame_skip_mask[2] = { 0 }; uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 }; #if CONFIG_EXT_INTER && CONFIG_INTERINTRA MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME; @@ -8850,6 +10168,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, *returnrate_nocoef = INT_MAX; #endif // CONFIG_SUPERTX +#if CONFIG_SPEED_REFS + memset(x->mbmi_ext->ref_mvs, 0, sizeof(x->mbmi_ext->ref_mvs)); +#endif // CONFIG_SPEED_REFS + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; x->mbmi_ext->mode_context[ref_frame] = 0; @@ -8873,6 +10195,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_GLOBAL_MOTION #if CONFIG_EXT_INTER frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV; +#if CONFIG_COMPOUND_SINGLEREF + frame_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV; + frame_comp_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV; +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_GLOBAL_MOTION frame_mv[ZERO_ZEROMV][ref_frame].as_int = gm_get_motion_vector(&cm->global_motion[ref_frame], @@ -8934,6 +10260,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, // Skip checking missing references in both single and compound reference // modes. Note that a mode will be skipped iff both reference frames // are masked out. +#if CONFIG_EXT_COMP_REFS + ref_frame_skip_mask[0] |= (1 << ref_frame); + ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; +#else // !CONFIG_EXT_COMP_REFS #if CONFIG_EXT_REFS if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) { ref_frame_skip_mask[0] |= (1 << ref_frame); @@ -8945,6 +10275,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #if CONFIG_EXT_REFS } #endif // CONFIG_EXT_REFS +#endif // CONFIG_EXT_COMP_REFS } else { for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { // Skip fixed mv modes for poor references @@ -9000,6 +10331,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV); if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int) mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV); +#if CONFIG_COMPOUND_SINGLEREF + if (frame_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int || + frame_comp_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int != + zeromv.as_int) + mode_skip_mask[ALTREF_FRAME] |= (1 << SR_NEAREST_NEARMV); +#endif // CONFIG_COMPOUND_SINGLEREF #endif // CONFIG_EXT_INTER } } @@ -9077,7 +10414,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int compmode_cost = 0; int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 int64_t distortion2_y = 0; int64_t total_sse_y = INT64_MAX; #endif @@ -9106,6 +10443,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int; frame_mv[this_mode][second_ref_frame].as_int = frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int; +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(this_mode)) { + frame_mv[this_mode][ref_frame].as_int = + frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int; + frame_comp_mv[this_mode][ref_frame].as_int = + frame_mv[compound_ref1_mode(this_mode)][ref_frame].as_int; +#endif // CONFIG_COMPOUND_SINGLEREF } #endif // CONFIG_EXT_INTER @@ -9154,6 +10498,34 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame)))) continue; +#if CONFIG_EXT_COMP_REFS +// TODO(zoeliu): Following toggle between #if 0/1 and the bug will manifest +// itself. +#if 0 + if (!(cpi->ref_frame_flags & flag_list[ref_frame]) || + (second_ref_frame > INTRA_FRAME && + (!(cpi->ref_frame_flags & flag_list[second_ref_frame])))) + printf("Frame=%d, bsize=%d, (mi_row,mi_col)=(%d,%d), ref_frame=%d, " + "second_ref_frame=%d\n", cm->current_video_frame, bsize, mi_row, + mi_col, ref_frame, second_ref_frame); + + if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; + if (second_ref_frame > INTRA_FRAME && + (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))) + continue; +#endif // 0 + +#if !USE_UNI_COMP_REFS + // NOTE(zoeliu): Temporarily disable uni-directional comp refs + if (second_ref_frame > INTRA_FRAME) { + if (!((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME))) + continue; + } + assert(second_ref_frame <= INTRA_FRAME || + ((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME))); +#endif // !USE_UNI_COMP_REFS +#endif // CONFIG_EXT_COMP_REFS + if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue; // Test best rd so far against threshold for trying this mode. @@ -9239,7 +10611,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, } mbmi->mode = this_mode; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; #if CONFIG_PALETTE @@ -9267,6 +10639,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Single ref compound mode + if (!comp_pred && is_inter_singleref_comp_mode(mbmi->mode)) { + xd->block_refs[1] = xd->block_refs[0]; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = xd->plane[i].pre[0]; + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_EXT_INTER && CONFIG_INTERINTRA mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA @@ -9277,7 +10658,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, struct macroblockd_plane *const pd = &xd->plane[1]; #if CONFIG_EXT_INTRA is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize); - if (is_directional_mode) { + if (is_directional_mode && av1_use_angle_delta(bsize)) { int rate_dummy; int64_t model_rd = INT64_MAX; if (!angle_stats_ready) { @@ -9390,10 +10771,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (av1_is_intra_filter_switchable(p_angle)) rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter]; #endif // CONFIG_INTRA_INTERP - rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, - MAX_ANGLE_DELTA + mbmi->angle_delta[0]); + if (av1_use_angle_delta(bsize)) { + rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, + MAX_ANGLE_DELTA + mbmi->angle_delta[0]); + } } - if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) { + if (av1_is_directional_mode(mbmi->uv_mode, bsize) && + av1_use_angle_delta(bsize)) { rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, MAX_ANGLE_DELTA + mbmi->angle_delta[1]); } @@ -9409,7 +10793,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, mbmi->filter_intra_mode_info.filter_intra_mode[0]); } } - if (mbmi->uv_mode == DC_PRED) { + if (mbmi->uv_mode == UV_DC_PRED) { rate2 += av1_cost_bit(cpi->common.fc->filter_intra_probs[1], mbmi->filter_intra_mode_info.use_filter_intra_mode[1]); @@ -9422,7 +10806,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize < BLOCK_8X8) distortion2_y = distortion_y; #endif } else { @@ -9481,6 +10865,27 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv; } } +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mbmi->mode)) { + if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) { + // TODO(zoeliu): To further investigate which ref_mv_idx should be + // chosen for the mode of SR_NEAR_NEWMV. + int ref_mv_idx = 0; + // Special case: SR_NEAR_NEWMV mode use + // 1 + mbmi->ref_mv_idx (like NEARMV) instead of + // mbmi->ref_mv_idx (like NEWMV) + if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1; + + if (compound_ref0_mode(mbmi->mode) == NEWMV || + compound_ref1_mode(mbmi->mode) == NEWMV) { + int_mv this_mv = + mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; + clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2, + xd->n8_h << MI_SIZE_LOG2, xd); + mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv; + } + } +#endif // CONFIG_COMPOUND_SINGLEREF } else { #endif // CONFIG_EXT_INTER if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) { @@ -9500,6 +10905,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, { RD_STATS rd_stats, rd_stats_y, rd_stats_uv; av1_init_rd_stats(&rd_stats); +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + // While av1 master uses rd_stats_y.rate through out the codebase, + // which is set when handle_inter_moden is called, the daala-dist code + // in rd_pick_partition() for cb4x4 and sub8x8 blocks need to know + // .dist_y which comes from rd_stats_y.dist and rd_stats_y.sse. + // The problem is rd_stats_y.dist and rd_stats_y.sse are sometimes not + // initialized when rd_stats.skip = 1, + // then instead rd_stats.dist and rd_stats.sse have the + // combined luma and chroma dist and sse. + // This can be seen inside motion_mode_rd(), which is called by + // handle_inter_mode(). + if (bsize < BLOCK_8X8) av1_init_rd_stats(&rd_stats_y); +#endif rd_stats.rate = rate2; // Point to variables that are maintained between loop iterations @@ -9510,6 +10928,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_EXT_INTER this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip, frame_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + frame_comp_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF mi_row, mi_col, &args, best_rd); rate2 = rd_stats.rate; @@ -9518,23 +10939,39 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, total_sse = rd_stats.sse; rate_y = rd_stats_y.rate; rate_uv = rd_stats_uv.rate; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + if (rd_stats_y.rate != INT_MAX) { + assert(rd_stats_y.sse < INT64_MAX); + assert(rd_stats_y.dist < INT64_MAX); + } + total_sse_y = rd_stats_y.sse; + distortion2_y = rd_stats_y.dist; + } #endif } // TODO(jingning): This needs some refactoring to improve code quality // and reduce redundant steps. #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if ((have_nearmv_in_inter_mode(mbmi->mode) && + mbmi_ext->ref_mv_count[ref_frame_type] > 2) || + ((mbmi->mode == NEWMV || mbmi->mode == SR_NEW_NEWMV || + mbmi->mode == NEW_NEWMV) && + mbmi_ext->ref_mv_count[ref_frame_type] > 1)) +#else // !CONFIG_COMPOUND_SINGLEREF if ((have_nearmv_in_inter_mode(mbmi->mode) && mbmi_ext->ref_mv_count[ref_frame_type] > 2) || ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) && - mbmi_ext->ref_mv_count[ref_frame_type] > 1)) { -#else + mbmi_ext->ref_mv_count[ref_frame_type] > 1)) +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER if ((mbmi->mode == NEARMV && mbmi_ext->ref_mv_count[ref_frame_type] > 2) || - (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) { -#endif + (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) +#endif // CONFIG_EXT_INTER + { int_mv backup_mv = frame_mv[NEARMV][ref_frame]; MB_MODE_INFO backup_mbmi = *mbmi; int backup_skip = x->skip; @@ -9560,18 +10997,16 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, rate2 += (rate2 < INT_MAX ? cpi->drl_mode_cost0[drl_ctx][0] : 0); if (this_rd < INT64_MAX) { - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < - RDCOST(x->rdmult, x->rddiv, 0, total_sse)) - tmp_ref_rd = - RDCOST(x->rdmult, x->rddiv, - rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0), - distortion2); + if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) < + RDCOST(x->rdmult, 0, total_sse)) + tmp_ref_rd = RDCOST( + x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0), + distortion2); else - tmp_ref_rd = - RDCOST(x->rdmult, x->rddiv, - rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) - - rate_y - rate_uv, - total_sse); + tmp_ref_rd = RDCOST( + x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) - + rate_y - rate_uv, + total_sse); } #if CONFIG_VAR_TX for (i = 0; i < MAX_MB_PLANE; ++i) @@ -9587,6 +11022,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv; av1_invalid_rd_stats(&tmp_rd_stats); + x->skip = 0; mbmi->ref_mv_idx = 1 + ref_idx; @@ -9627,6 +11063,34 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, xd->n8_h << MI_SIZE_LOG2, xd); mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv; } +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mbmi->mode)) { + int ref_mv_idx = mbmi->ref_mv_idx; + // Special case: SR_NEAR_NEWMV mode use + // 1 + mbmi->ref_mv_idx (like NEARMV) instead of + // mbmi->ref_mv_idx (like NEWMV) + if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1 + mbmi->ref_mv_idx; + + // TODO(zoeliu): For the mode of SR_NEAREST_NEWMV, as it only runs + // the "if", not the "else if", + // mbmi_ext->ref_mvs[mbmi->ref_frame[0]] takes the + // value for "NEWMV", instead of "NEARESTMV". + if (compound_ref0_mode(mbmi->mode) == NEWMV || + compound_ref1_mode(mbmi->mode) == NEWMV) { + int_mv this_mv = + mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; + clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2, + xd->n8_h << MI_SIZE_LOG2, xd); + mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv; + } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV || + compound_ref1_mode(mbmi->mode) == NEARESTMV) { + int_mv this_mv = + mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv; + clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2, + xd->n8_h << MI_SIZE_LOG2, xd); + mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv; + } +#endif // CONFIG_COMPOUND_SINGLEREF } else { #endif // CONFIG_EXT_INTER for (ref = 0; ref < 1 + comp_pred; ++ref) { @@ -9657,16 +11121,28 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, frame_mv[NEARMV][ref_frame] = cur_mv; av1_init_rd_stats(&tmp_rd_stats); - +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + // With the same reason as 'rd_stats_y' passed to above + // handle_inter_mode(), tmp_rd_stats_y.dist and + // tmp_rd_stats_y.sse are sometimes not initialized, esp. when + // tmp_rd_stats.skip = 1 and tmp_rd_stats.dist and .sse + // represent combined luma and chroma .dist and .sse, + // we should initialized tmp_rd_stats_y. + if (bsize < BLOCK_8X8) av1_init_rd_stats(&tmp_rd_stats_y); +#endif // Point to variables that are not maintained between iterations args.single_newmv = dummy_single_newmv; #if CONFIG_EXT_INTER args.single_newmv_rate = dummy_single_newmv_rate; args.modelled_rd = NULL; #endif // CONFIG_EXT_INTER - tmp_alt_rd = handle_inter_mode( - cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv, - &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd); + tmp_alt_rd = handle_inter_mode(cpi, x, bsize, &tmp_rd_stats, + &tmp_rd_stats_y, &tmp_rd_stats_uv, + &dummy_disable_skip, frame_mv, +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + frame_comp_mv, +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + mi_row, mi_col, &args, best_rd); // Prevent pointers from escaping local scope args.single_newmv = NULL; #if CONFIG_EXT_INTER @@ -9696,25 +11172,22 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (tmp_alt_rd < INT64_MAX) { #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - tmp_alt_rd = RDCOST(x->rdmult, x->rddiv, tmp_rd_stats.rate, - tmp_rd_stats.dist); + tmp_alt_rd = + RDCOST(x->rdmult, tmp_rd_stats.rate, tmp_rd_stats.dist); #else - if (RDCOST(x->rdmult, x->rddiv, - tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate, + if (RDCOST(x->rdmult, tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate, tmp_rd_stats.dist) < - RDCOST(x->rdmult, x->rddiv, 0, tmp_rd_stats.sse)) - tmp_alt_rd = - RDCOST(x->rdmult, x->rddiv, - tmp_rd_stats.rate + - av1_cost_bit(av1_get_skip_prob(cm, xd), 0), - tmp_rd_stats.dist); + RDCOST(x->rdmult, 0, tmp_rd_stats.sse)) + tmp_alt_rd = RDCOST( + x->rdmult, tmp_rd_stats.rate + + av1_cost_bit(av1_get_skip_prob(cm, xd), 0), + tmp_rd_stats.dist); else - tmp_alt_rd = - RDCOST(x->rdmult, x->rddiv, - tmp_rd_stats.rate + - av1_cost_bit(av1_get_skip_prob(cm, xd), 1) - - tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate, - tmp_rd_stats.sse); + tmp_alt_rd = RDCOST( + x->rdmult, tmp_rd_stats.rate + + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) - + tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate, + tmp_rd_stats.sse); #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION } @@ -9730,8 +11203,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, tmp_ref_rd = tmp_alt_rd; backup_mbmi = *mbmi; backup_skip = x->skip; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize < BLOCK_8X8) { + if (tmp_rd_stats_y.rate != INT_MAX) { + assert(tmp_rd_stats_y.sse < INT64_MAX); + assert(tmp_rd_stats_y.dist < INT64_MAX); + } total_sse_y = tmp_rd_stats_y.sse; distortion2_y = tmp_rd_stats_y.dist; } @@ -9774,19 +11251,33 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, // Estimate the reference frame signaling cost and add it // to the rolling cost variable. if (comp_pred) { +#if CONFIG_EXT_COMP_REFS + rate2 += ref_costs_comp[ref_frame][second_ref_frame]; +#else // !CONFIG_EXT_COMP_REFS rate2 += ref_costs_comp[ref_frame]; #if CONFIG_EXT_REFS rate2 += ref_costs_comp[second_ref_frame]; #endif // CONFIG_EXT_REFS +#endif // CONFIG_EXT_COMP_REFS } else { rate2 += ref_costs_single[ref_frame]; } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Add the cost to signal single/comp mode in single ref. + if (!comp_pred && cm->reference_mode != COMPOUND_REFERENCE) { + aom_prob singleref_comp_mode_p = av1_get_inter_mode_prob(cm, xd); + rate2 += av1_cost_bit(singleref_comp_mode_p, + is_inter_singleref_comp_mode(mbmi->mode)); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - if (ref_frame == INTRA_FRAME) { + if (ref_frame == INTRA_FRAME) #else - if (!disable_skip) { + if (!disable_skip) #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION + { if (skippable) { // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); @@ -9795,9 +11286,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, // Cost the skip mb case rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1); } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) { - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0, - distortion2) < - RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) { + if (RDCOST(x->rdmult, rate_y + rate_uv + rate_skip0, distortion2) < + RDCOST(x->rdmult, rate_skip1, total_sse)) { // Add in the cost of the no skip flag. rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } else { @@ -9809,8 +11299,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, this_skip2 = 1; rate_y = 0; rate_uv = 0; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - if (bsize < BLOCK_8X8) distortion2_y = total_sse_y; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + assert(total_sse_y < INT64_MAX); + distortion2_y = total_sse_y; + } #endif } } else { @@ -9819,11 +11312,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, } // Calculate the final RD estimate for this mode. - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION } else { this_skip2 = mbmi->skip; - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); if (this_skip2) { rate_y = 0; rate_uv = 0; @@ -9831,6 +11324,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION } +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if ((bsize < BLOCK_8X8) && (rate2 != INT_MAX)) { + assert(distortion2_y < INT64_MAX); + } +#endif + if (ref_frame == INTRA_FRAME) { // Keep record of best intra rd if (this_rd < best_intra_rd) { @@ -9875,12 +11374,18 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd), mbmi->ref_frame[0] != INTRA_FRAME); #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION +#if CONFIG_WARPED_MOTION + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); +#endif #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION MODE_INFO *const mi = xd->mi[0]; const MOTION_MODE motion_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, xd->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif mi); if (motion_allowed == WARPED_CAUSAL) *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode]; @@ -9901,8 +11406,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd), this_skip2 || skippable); best_rate_uv = rate_uv; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + assert(distortion2_y < INT64_MAX); + rd_cost->dist_y = distortion2_y; + } #endif #if CONFIG_VAR_TX for (i = 0; i < MAX_MB_PLANE; ++i) @@ -9911,7 +11419,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_VAR_TX } } - +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) { + assert(rd_cost->dist_y < INT64_MAX); + } +#endif /* keep record of best compound/single-only prediction */ if (!disable_skip && ref_frame != INTRA_FRAME) { int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; @@ -9924,8 +11436,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, hybrid_rate = rate2 + compmode_cost; } - single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); + single_rd = RDCOST(x->rdmult, single_rate, distortion2); + hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2); if (!comp_pred) { if (single_rd < best_pred_rd[SINGLE_REFERENCE]) @@ -9963,6 +11475,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Single ref compound mode + if (!has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode)) { + xd->block_refs[1] = xd->block_refs[0]; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = xd->plane[i].pre[0]; + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (is_inter_mode(mbmi->mode)) { av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); #if CONFIG_MOTION_VAR @@ -9996,9 +11517,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); } - if (RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate, + if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate, (rd_stats_y.dist + rd_stats_uv.dist)) > - RDCOST(x->rdmult, x->rddiv, 0, (rd_stats_y.sse + rd_stats_uv.sse))) { + RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) { skip_blk = 1; rd_stats_y.rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1); rd_stats_uv.rate = 0; @@ -10009,8 +11530,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, rd_stats_y.rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } - if (RDCOST(x->rdmult, x->rddiv, best_rate_y + best_rate_uv, rd_cost->dist) > - RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate, + if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) > + RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate, (rd_stats_y.dist + rd_stats_uv.dist))) { #if CONFIG_VAR_TX int idx, idy; @@ -10031,15 +11552,24 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, rd_cost->rate += (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv); rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist; -#endif - rd_cost->rdcost = - RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); + rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); best_skip2 = skip_blk; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + assert(rd_cost->rate != INT_MAX); + assert(rd_cost->dist_y < INT64_MAX); + rd_cost->dist_y = rd_stats_y.dist; + } +#endif } } +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) { + assert(rd_cost->dist_y < INT64_MAX); + } +#endif + #if CONFIG_PALETTE // Only try palette mode when the best mode so far is an intra mode. if (try_palette && !is_inter_mode(best_mbmode.mode)) { @@ -10058,7 +11588,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, MB_MODE_INFO best_mbmi_palette = best_mbmode; mbmi->mode = DC_PRED; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE_FRAME; rate_overhead_palette = rd_pick_palette_intra_sby( @@ -10119,7 +11649,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_SUPERTX rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); if (this_rd < best_rd) { best_mode_index = 3; mbmi->mv[0].as_int = 0; @@ -10165,10 +11695,14 @@ PALETTE_EXIT: } #endif // CONFIG_FILTER_INTRA - // The inter modes' rate costs are not calculated precisely in some cases. - // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and - // ZEROMV. Here, checks are added for those cases, and the mode decisions - // are corrected. +// The inter modes' rate costs are not calculated precisely in some cases. +// Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and +// ZEROMV. Here, checks are added for those cases, and the mode decisions +// are corrected. +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF +// NOTE: For SR_NEW_NEWMV, no need to check as the two mvs from the same ref +// are surely different from each other. +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF if (best_mbmode.mode == NEWMV #if CONFIG_EXT_INTER || best_mbmode.mode == NEW_NEWMV @@ -10248,8 +11782,9 @@ PALETTE_EXIT: } if (nearestmv[0].as_int == best_mbmode.mv[0].as_int && - nearestmv[1].as_int == best_mbmode.mv[1].as_int) { + nearestmv[1].as_int == best_mbmode.mv[1].as_int) #if CONFIG_EXT_INTER + { best_mbmode.mode = NEAREST_NEARESTMV; } else { int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2) @@ -10274,6 +11809,7 @@ PALETTE_EXIT: best_mbmode.mode = ZERO_ZEROMV; } #else + { best_mbmode.mode = NEARESTMV; } else if (best_mbmode.mv[0].as_int == zeromv[0].as_int && best_mbmode.mv[1].as_int == zeromv[1].as_int) { @@ -10287,11 +11823,18 @@ PALETTE_EXIT: // using a mode which can support ref_mv_idx if (best_mbmode.ref_mv_idx != 0 && #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + !(best_mbmode.mode == NEWMV || best_mbmode.mode == SR_NEW_NEWMV || + best_mbmode.mode == NEW_NEWMV || + have_nearmv_in_inter_mode(best_mbmode.mode))) +#else // !CONFIG_COMPOUND_SINGLEREF !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV || - have_nearmv_in_inter_mode(best_mbmode.mode))) { -#else - !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) { -#endif + have_nearmv_in_inter_mode(best_mbmode.mode))) +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER + !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) +#endif // CONFIG_EXT_INTER + { best_mbmode.ref_mv_idx = 0; } @@ -10377,11 +11920,12 @@ PALETTE_EXIT: ) { #if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR // Correct the motion mode for ZEROMV - const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed( -#if SEPARATE_GLOBAL_MOTION - 0, xd->global_motion, -#endif // SEPARATE_GLOBAL_MOTION - xd->mi[0]); + const MOTION_MODE last_motion_mode_allowed = + motion_mode_allowed(0, xd->global_motion, +#if CONFIG_WARPED_MOTION + xd, +#endif + xd->mi[0]); if (mbmi->motion_mode > last_motion_mode_allowed) mbmi->motion_mode = last_motion_mode_allowed; #endif // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR @@ -10445,7 +11989,11 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, int i; int64_t best_pred_diff[REFERENCE_MODES]; unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME]; +#if CONFIG_EXT_COMP_REFS + unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME]; +#else unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME]; +#endif // CONFIG_EXT_COMP_REFS aom_prob comp_mode_p; InterpFilter best_filter = SWITCHABLE; int64_t this_rd = INT64_MAX; @@ -10476,7 +12024,7 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, #endif // CONFIG_FILTER_INTRA mbmi->mode = ZEROMV; mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = LAST_FRAME; mbmi->ref_frame[1] = NONE_FRAME; #if CONFIG_GLOBAL_MOTION @@ -10501,7 +12049,17 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, #if CONFIG_WARPED_MOTION if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) { int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; +#if WARPED_MOTION_SORT_SAMPLES + int pts_mv[SAMPLES_ARRAY_SIZE]; + mbmi->num_proj_ref[0] = + findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv); + // Rank the samples by motion vector difference + if (mbmi->num_proj_ref[0] > 1) + mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts, + pts_inref, mbmi->num_proj_ref[0]); +#else mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref); +#endif // WARPED_MOTION_SORT_SAMPLES } #endif @@ -10548,12 +12106,12 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, // Estimate the reference frame signaling cost and add it // to the rolling cost variable. rate2 += ref_costs_single[LAST_FRAME]; - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + this_rd = RDCOST(x->rdmult, rate2, distortion2); rd_cost->rate = rate2; rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2; #endif if (this_rd >= best_rd_so_far) { @@ -10646,7 +12204,8 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, // handle above row if (xd->up_available) { - const int overlap = num_4x4_blocks_high_lookup[bsize] * 2; + const int overlap = + AOMMIN(block_size_high[bsize] >> 1, block_size_high[BLOCK_64X64] >> 1); const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col); const int mi_row_offset = -1; const uint8_t *const mask1d = av1_get_obmc_mask(overlap); @@ -10666,7 +12225,9 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, &xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi; #endif const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8); - const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]); + const int above_step = + AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]); + const int mi_step = AOMMIN(xd->n8_w, above_step); const int neighbor_bw = mi_step * MI_SIZE; if (is_neighbor_overlappable(above_mbmi)) { @@ -10725,7 +12286,8 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, // handle left column if (xd->left_available) { - const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2; + const int overlap = + AOMMIN(block_size_wide[bsize] >> 1, block_size_wide[BLOCK_64X64] >> 1); const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row); const int mi_col_offset = -1; const uint8_t *const mask1d = av1_get_obmc_mask(overlap); @@ -10746,7 +12308,9 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, &xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi; #endif const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8); - const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]); + const int left_step = + AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]); + const int mi_step = AOMMIN(xd->n8_h, left_step); const int neighbor_bh = mi_step * MI_SIZE; if (is_neighbor_overlappable(left_mbmi)) { @@ -10854,8 +12418,23 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); av1_subtract_plane(x, bsize, 0); +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { + select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); + } else { + int idx, idy; + super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + mbmi->inter_tx_size[idy][idx] = mbmi->tx_size; + memset(x->blk_skip[0], rd_stats_y.skip, + sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4); + } + inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); +#else super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); +#endif assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX); if (rd_stats_y.skip && rd_stats_uv.skip) { rd_stats_y.rate = rate_skip1; @@ -10863,10 +12442,10 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, rd_stats_y.dist = rd_stats_y.sse; rd_stats_uv.dist = rd_stats_uv.sse; skip_blk = 0; - } else if (RDCOST(x->rdmult, x->rddiv, + } else if (RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0), (rd_stats_y.dist + rd_stats_uv.dist)) > - RDCOST(x->rdmult, x->rddiv, rate_skip1, + RDCOST(x->rdmult, rate_skip1, (rd_stats_y.sse + rd_stats_uv.sse))) { rd_stats_y.rate = rate_skip1; rd_stats_uv.rate = 0; @@ -10879,18 +12458,33 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, } backup_skip = skip_blk; backup_mbmi = *mbmi; - rd_causal = RDCOST(x->rdmult, x->rddiv, (rd_stats_y.rate + rd_stats_uv.rate), + rd_causal = RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate), (rd_stats_y.dist + rd_stats_uv.dist)); - rd_causal += RDCOST(x->rdmult, x->rddiv, - av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0); + rd_causal += + RDCOST(x->rdmult, av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0); // Check non-causal mode mbmi->motion_mode = OBMC_CAUSAL; av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col); av1_subtract_plane(x, bsize, 0); +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { + select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); + } else { + int idx, idy; + super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + mbmi->inter_tx_size[idy][idx] = mbmi->tx_size; + memset(x->blk_skip[0], rd_stats_y.skip, + sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4); + } + inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); +#else super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); +#endif assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX); if (rd_stats_y.skip && rd_stats_uv.skip) { rd_stats_y.rate = rate_skip1; @@ -10898,10 +12492,10 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, rd_stats_y.dist = rd_stats_y.sse; rd_stats_uv.dist = rd_stats_uv.sse; skip_blk = 0; - } else if (RDCOST(x->rdmult, x->rddiv, + } else if (RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0), (rd_stats_y.dist + rd_stats_uv.dist)) > - RDCOST(x->rdmult, x->rddiv, rate_skip1, + RDCOST(x->rdmult, rate_skip1, (rd_stats_y.sse + rd_stats_uv.sse))) { rd_stats_y.rate = rate_skip1; rd_stats_uv.rate = 0; @@ -10914,9 +12508,8 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, } if (rd_causal > - RDCOST(x->rdmult, x->rddiv, - rd_stats_y.rate + rd_stats_uv.rate + - av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1), + RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate + + av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1), (rd_stats_y.dist + rd_stats_uv.dist))) { x->skip = skip_blk; } else { |