summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder/rdopt.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/encoder/rdopt.c')
-rw-r--r--third_party/aom/av1/encoder/rdopt.c3417
1 files changed, 2505 insertions, 912 deletions
diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c
index 2a537a06a..43b00b83b 100644
--- a/third_party/aom/av1/encoder/rdopt.c
+++ b/third_party/aom/av1/encoder/rdopt.c
@@ -63,7 +63,7 @@
#endif // CONFIG_PVQ
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
-#endif // CONFIG_PVQ || CONFIG_DAALA_DIST
+#endif // CONFIG_PVQ || CONFIG_DIST_8X8
#if CONFIG_DUAL_FILTER
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
#if USE_EXTRA_FILTER
@@ -113,8 +113,14 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
#endif // CONFIG_EXT_REFS
#if CONFIG_EXT_REFS
+#if CONFIG_EXT_COMP_REFS
+#define SECOND_REF_FRAME_MASK \
+ ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | (1 << GOLDEN_FRAME) | \
+ (1 << LAST2_FRAME) | 0x01) // NOLINT
+#else // !CONFIG_EXT_COMP_REFS
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
-#else
+#endif // CONFIG_EXT_COMP_REFS
+#else // !CONFIG_EXT_REFS
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
#endif // CONFIG_EXT_REFS
@@ -126,6 +132,11 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
#define FILTER_FAST_SEARCH 1
#endif // CONFIG_EXT_INTRA
+// Setting this to 1 will disable trellis optimization within the
+// transform search. Trellis optimization will still be applied
+// in the final encode.
+#define DISABLE_TRELLISQ_SEARCH 0
+
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert
-7.7051, -3.2234, -3.6193, 3.4533 }; // horz
@@ -191,6 +202,56 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
// TODO(zoeliu): May need to reconsider the order on the modes to check
#if CONFIG_EXT_INTER
+
+#if CONFIG_COMPOUND_SINGLEREF
+ // Single ref comp mode
+ { SR_NEAREST_NEARMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_NEAREST_NEARMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEARMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEARMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_NEAREST_NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEARMV, { ALTREF_FRAME, NONE_FRAME } },
+
+ /*
+ { SR_NEAREST_NEWMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_NEAREST_NEWMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEWMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_NEAREST_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_NEAREST_NEWMV, { ALTREF_FRAME, NONE_FRAME } },*/
+
+ { SR_NEAR_NEWMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_NEAR_NEWMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_NEAR_NEWMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_NEAR_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_NEAR_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_NEAR_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
+
+ { SR_ZERO_NEWMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_ZERO_NEWMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_ZERO_NEWMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_ZERO_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_ZERO_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_ZERO_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
+
+ { SR_NEW_NEWMV, { LAST_FRAME, NONE_FRAME } },
+#if CONFIG_EXT_REFS
+ { SR_NEW_NEWMV, { LAST2_FRAME, NONE_FRAME } },
+ { SR_NEW_NEWMV, { LAST3_FRAME, NONE_FRAME } },
+ { SR_NEW_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { SR_NEW_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
+ { SR_NEW_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
+#endif // CONFIG_COMPOUND_SINGLEREF
+
{ NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
@@ -202,6 +263,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#else // CONFIG_EXT_INTER
@@ -217,6 +285,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
@@ -297,9 +372,43 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
+ { ZERO_ZEROMV, { LAST_FRAME, LAST2_FRAME } },
+
+ { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
+ { ZERO_ZEROMV, { LAST_FRAME, LAST3_FRAME } },
+
+ { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { ZERO_ZEROMV, { LAST_FRAME, GOLDEN_FRAME } },
+
+ { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { ZERO_ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
-#else // CONFIG_EXT_INTER
+#else // !CONFIG_EXT_INTER
{ NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEWMV, { LAST_FRAME, ALTREF_FRAME } },
@@ -321,6 +430,17 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { NEARMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEWMV, { LAST_FRAME, LAST2_FRAME } },
+ { NEARMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEWMV, { LAST_FRAME, LAST3_FRAME } },
+ { NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
+ { NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
{ ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
@@ -335,6 +455,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
{ ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
{ ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+
+#if CONFIG_EXT_COMP_REFS
+ { ZEROMV, { LAST_FRAME, LAST2_FRAME } },
+ { ZEROMV, { LAST_FRAME, LAST3_FRAME } },
+ { ZEROMV, { LAST_FRAME, GOLDEN_FRAME } },
+ { ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_COMP_REFS
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
@@ -385,6 +512,35 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
#endif // CONFIG_EXT_INTER
};
+static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
+ DC_PRED, H_PRED, V_PRED,
+#if CONFIG_ALT_INTRA
+ SMOOTH_PRED,
+#endif // CONFIG_ALT_INTRA
+ TM_PRED,
+#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+ SMOOTH_V_PRED, SMOOTH_H_PRED,
+#endif // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+ D135_PRED, D207_PRED, D153_PRED, D63_PRED, D117_PRED, D45_PRED,
+};
+
+#if CONFIG_CFL
+static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
+ UV_DC_PRED, UV_H_PRED, UV_V_PRED,
+#if CONFIG_ALT_INTRA
+ UV_SMOOTH_PRED,
+#endif // CONFIG_ALT_INTRA
+ UV_TM_PRED,
+#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+ UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
+#endif // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+ UV_D135_PRED, UV_D207_PRED, UV_D153_PRED,
+ UV_D63_PRED, UV_D117_PRED, UV_D45_PRED,
+};
+#else
+#define uv_rd_search_mode_order intra_rd_search_mode_order
+#endif // CONFIG_CFL
+
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
static INLINE int write_uniform_cost(int n, int v) {
const int l = get_unsigned_bits(n);
@@ -404,7 +560,7 @@ static INLINE int write_uniform_cost(int n, int v) {
#define FAST_EXT_TX_EDST_MARGIN 0.3
#if CONFIG_DAALA_DIST
-static int od_compute_var_4x4(od_coeff *x, int stride) {
+static int od_compute_var_4x4(uint16_t *x, int stride) {
int sum;
int s2;
int i;
@@ -420,7 +576,7 @@ static int od_compute_var_4x4(od_coeff *x, int stride) {
s2 += t * t;
}
}
- // TODO(yushin) : Check wheter any changes are required for high bit depth.
+
return (s2 - (sum * sum >> 4)) >> 4;
}
@@ -431,8 +587,8 @@ static int od_compute_var_4x4(od_coeff *x, int stride) {
#define OD_DIST_LP_MID (5)
#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
-static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
- od_coeff *y, od_coeff *e_lp, int stride) {
+static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
+ uint16_t *y, od_coeff *e_lp, int stride) {
double sum;
int min_var;
double mean_var;
@@ -444,8 +600,7 @@ static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
double vardist;
vardist = 0;
- OD_ASSERT(qm != OD_FLAT_QM);
- (void)qm;
+
#if 1
min_var = INT_MAX;
mean_var = 0;
@@ -490,22 +645,61 @@ static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
}
// Note : Inputs x and y are in a pixel domain
-static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
- od_coeff *y, int bsize_w, int bsize_h,
- int qindex) {
+static double od_compute_dist_common(int activity_masking, uint16_t *x,
+ uint16_t *y, int bsize_w, int bsize_h,
+ int qindex, od_coeff *tmp,
+ od_coeff *e_lp) {
+ int i, j;
+ double sum = 0;
+ const int mid = OD_DIST_LP_MID;
+
+ for (j = 0; j < bsize_w; j++) {
+ e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
+ e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
+ 2 * tmp[(bsize_h - 2) * bsize_w + j];
+ }
+ for (i = 1; i < bsize_h - 1; i++) {
+ for (j = 0; j < bsize_w; j++) {
+ e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
+ tmp[(i - 1) * bsize_w + j] +
+ tmp[(i + 1) * bsize_w + j];
+ }
+ }
+ for (i = 0; i < bsize_h; i += 8) {
+ for (j = 0; j < bsize_w; j += 8) {
+ sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
+ &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
+ bsize_w);
+ }
+ }
+ /* Scale according to linear regression against SSE, for 8x8 blocks. */
+ if (activity_masking) {
+ sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
+ (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
+ } else {
+ sum *= qindex >= 128
+ ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
+ : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
+ : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
+ }
+
+ return sum;
+}
+
+static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
+ int bsize_h, int qindex) {
int i;
double sum;
sum = 0;
assert(bsize_w >= 8 && bsize_h >= 8);
- if (qm == OD_FLAT_QM) {
- for (i = 0; i < bsize_w * bsize_h; i++) {
- double tmp;
- tmp = x[i] - y[i];
- sum += tmp * tmp;
- }
- } else {
+#if CONFIG_PVQ
+ int activity_masking = 1;
+#else
+ int activity_masking = 0;
+#endif
+ {
int j;
DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
@@ -525,63 +719,242 @@ static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
}
}
- for (j = 0; j < bsize_w; j++) {
- e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
- e_lp[(bsize_h - 1) * bsize_w + j] =
- mid * tmp[(bsize_h - 1) * bsize_w + j] +
- 2 * tmp[(bsize_h - 2) * bsize_w + j];
- }
- for (i = 1; i < bsize_h - 1; i++) {
+ sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
+ qindex, tmp, e_lp);
+ }
+ return sum;
+}
+
+static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
+ int bsize_h, int qindex) {
+ int i;
+ double sum;
+ sum = 0;
+
+ assert(bsize_w >= 8 && bsize_h >= 8);
+
+#if CONFIG_PVQ
+ int activity_masking = 1;
+#else
+ int activity_masking = 0;
+#endif
+ {
+ int j;
+ DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
+ int mid = OD_DIST_LP_MID;
+ for (i = 0; i < bsize_h; i++) {
for (j = 0; j < bsize_w; j++) {
- e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
- tmp[(i - 1) * bsize_w + j] +
- tmp[(i + 1) * bsize_w + j];
+ y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
}
}
- for (i = 0; i < bsize_h; i += 8) {
- for (j = 0; j < bsize_w; j += 8) {
- sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
- &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
- bsize_w);
+ for (i = 0; i < bsize_h; i++) {
+ tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
+ tmp[i * bsize_w + bsize_w - 1] =
+ mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
+ for (j = 1; j < bsize_w - 1; j++) {
+ tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
+ e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
}
}
- /* Scale according to linear regression against SSE, for 8x8 blocks. */
- if (activity_masking) {
- sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
- (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
- } else {
- sum *= qindex >= 128
- ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
- : qindex <= 43
- ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
- : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
- }
+ sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
+ qindex, tmp, e_lp);
}
return sum;
}
+#endif // CONFIG_DAALA_DIST
+
+#if CONFIG_DIST_8X8
+#define NEW_FUTURE_DIST 0
+int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
+ const uint8_t *src, int src_stride, const uint8_t *dst,
+ int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
+ int bsh, int visible_w, int visible_h, int qindex) {
+ int64_t d = 0;
-int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
- int dst_stride, int bsw, int bsh, int qm,
- int use_activity_masking, int qindex) {
+#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
int i, j;
- int64_t d;
- DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
- DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);
- assert(qm == OD_HVS_QM);
+ DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]);
+ (void)cpi;
+ (void)tx_bsize;
+#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+
+#if !CONFIG_HIGHBITDEPTH
+ (void)xd;
+#endif
+
+#if !CONFIG_DAALA_DIST
+ (void)qindex;
+#endif
+
+#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST
+ (void)xd;
+ (void)bsw, (void)bsh;
+ (void)visible_w, (void)visible_h;
+#endif
+
+#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+
+ if ((bsw == visible_w) && (bsh == visible_h)) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ }
+
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ }
+ }
+ } else {
+#endif
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+
+ if ((bsw == visible_w) && (bsh == visible_h)) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ rec[j * bsw + i] = dst[j * dst_stride + i];
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++)
+ rec[j * bsw + i] = src[j * src_stride + i];
+ }
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++) rec[j * bsw + i] = src[j * src_stride + i];
+ }
+ }
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+
+#if CONFIG_DAALA_DIST
+ d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
+#elif NEW_FUTURE_DIST
+ // Call new 8x8-wise distortion function here, for example
+ for (i = 0; i < bsh; i += 8) {
+ for (j = 0; j < bsw; j += 8) {
+ d +=
+ av1_compute_dist_8x8(&orig[i * bsw + j], &rec[i * bsw + j], bsw, bsh);
+ }
+ }
+#else
+ // Otherwise, MSE by default
+ unsigned sse;
+ // TODO(Any): Use even faster function which does not calculate variance
+ cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
+ d = sse;
+#endif // CONFIG_DAALA_DIST
- d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
- qindex);
return d;
}
+
+static int64_t av1_dist_8x8_diff(const MACROBLOCKD *xd, const uint8_t *src,
+ int src_stride, const int16_t *diff,
+ int diff_stride, int bsw, int bsh,
+ int visible_w, int visible_h, int qindex) {
+ int64_t d = 0;
+
+#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+ int i, j;
+
+ DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]);
+#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+
+#if !CONFIG_HIGHBITDEPTH
+ (void)xd;
+#endif
+
+#if !CONFIG_DAALA_DIST
+ (void)qindex;
+#endif
+
+#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST
+ (void)xd;
+ (void)src, (void)src_stride;
+ (void)bsw, (void)bsh;
+ (void)visible_w, (void)visible_h;
+#endif
+
+#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+ } else {
+#endif
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+
+ if ((bsw == visible_w) && (bsh == visible_h)) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++) diff16[j * bsw + i] = diff[j * diff_stride + i];
+ } else {
+ for (j = 0; j < visible_h; j++)
+ for (i = 0; i < visible_w; i++)
+ diff16[j * bsw + i] = diff[j * diff_stride + i];
+
+ if (visible_w < bsw) {
+ for (j = 0; j < bsh; j++)
+ for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
+ }
+
+ if (visible_h < bsh) {
+ for (j = visible_h; j < bsh; j++)
+ for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
+ }
+ }
+#endif // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+
+#if CONFIG_DAALA_DIST
+ d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
+#elif NEW_FUTURE_DIST
+ // Call new 8x8-wise distortion function (with diff inpu) here, for example
+ for (i = 0; i < bsh; i += 8) {
+ for (j = 0; j < bsw; j += 8) {
+ d += av1_compute_dist_8x8_diff(&orig[i * bsw + j], &diff16[i * bsw + j],
+ bsw, bsh);
+ }
+ }
+#else
+ // Otherwise, MSE by default
+ d = aom_sum_squares_2d_i16(diff, diff_stride, bsw, bsh);
#endif // CONFIG_DAALA_DIST
+ return d;
+}
+#endif // CONFIG_DIST_8X8
+
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
@@ -892,11 +1265,11 @@ static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
for (plane = plane_from; plane <= plane_to; ++plane) {
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
-#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
+#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#else
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
-#endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
+#endif // CONFIG_CHROMA_SUB8X8
unsigned int sse;
int rate;
@@ -1068,7 +1441,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
// Check for consistency of tx_size with mode info
- assert(tx_size == get_tx_size(plane, xd));
+ assert(tx_size == av1_get_tx_size(plane, xd));
#endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
(void)cm;
@@ -1144,10 +1517,12 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
#endif // !CONFIG_LV_MAP
int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
- int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
- const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
- int use_fast_coef_costing) {
+ int blk_row, int blk_col, int block, TX_SIZE tx_size,
+ const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a,
+ const ENTROPY_CONTEXT *l, int use_fast_coef_costing) {
#if !CONFIG_LV_MAP
+ (void)blk_row;
+ (void)blk_col;
const AV1_COMMON *const cm = &cpi->common;
return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
use_fast_coef_costing);
@@ -1158,13 +1533,11 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const struct macroblockd_plane *pd = &xd->plane[plane];
const BLOCK_SIZE bsize = mbmi->sb_type;
-#if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-#else
+#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
-#endif // CONFIG_CHROMA_2X2
+#elif CONFIG_CB4X4
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
#else // CONFIG_CB4X4
const BLOCK_SIZE plane_bsize =
get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
@@ -1172,7 +1545,8 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
TXB_CTX txb_ctx;
get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
- return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx);
+ return av1_cost_coeffs_txb(cpi, x, plane, blk_row, blk_col, block, tx_size,
+ &txb_ctx);
#endif // !CONFIG_LV_MAP
}
#endif // !CONFIG_PVQ || CONFIG_VAR_TX
@@ -1182,9 +1556,9 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
BLOCK_SIZE tx_bsize, int *width, int *height,
int *visible_width, int *visible_height) {
-#if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
+#if !(CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX))
assert(tx_bsize <= plane_bsize);
-#endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
+#endif
int txb_height = block_size_high[tx_bsize];
int txb_width = block_size_wide[tx_bsize];
const int block_height = block_size_high[plane_bsize];
@@ -1208,19 +1582,31 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
}
-// Compute the pixel domain sum square error on all visible 4x4s in the
+// Compute the pixel domain distortion from src and dst on all visible 4x4s in
+// the
// transform block.
-static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
- int plane, const uint8_t *src, const int src_stride,
- const uint8_t *dst, const int dst_stride, int blk_row,
- int blk_col, const BLOCK_SIZE plane_bsize,
- const BLOCK_SIZE tx_bsize) {
+static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
+ int plane, const uint8_t *src, const int src_stride,
+ const uint8_t *dst, const int dst_stride,
+ int blk_row, int blk_col,
+ const BLOCK_SIZE plane_bsize,
+ const BLOCK_SIZE tx_bsize) {
int txb_rows, txb_cols, visible_rows, visible_cols;
+ const MACROBLOCKD *xd = &x->e_mbd;
+
get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
&txb_cols, &txb_rows, &visible_cols, &visible_rows);
assert(visible_rows > 0);
assert(visible_cols > 0);
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+
+#if CONFIG_DIST_8X8
+ if (plane == 0 && txb_cols >= 8 && txb_rows >= 8)
+ return av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, tx_bsize,
+ txb_cols, txb_rows, visible_cols, visible_rows,
+ x->qindex);
+#endif // CONFIG_DIST_8X8
+
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
if ((txb_rows == visible_rows && txb_cols == visible_cols) &&
tx_bsize < BLOCK_SIZES) {
#else
@@ -1242,36 +1628,86 @@ static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
return sse;
}
-// Compute the squares sum squares on all visible 4x4s in the transform block.
-static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane,
- const int16_t *diff, const int diff_stride,
- int blk_row, int blk_col,
- const BLOCK_SIZE plane_bsize,
- const BLOCK_SIZE tx_bsize) {
+// Compute the pixel domain distortion from diff on all visible 4x4s in the
+// transform block.
+static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
+ const int16_t *diff, const int diff_stride,
+ int blk_row, int blk_col,
+ const BLOCK_SIZE plane_bsize,
+ const BLOCK_SIZE tx_bsize) {
int visible_rows, visible_cols;
+ const MACROBLOCKD *xd = &x->e_mbd;
+#if CONFIG_DIST_8X8
+ int txb_height = block_size_high[tx_bsize];
+ int txb_width = block_size_wide[tx_bsize];
+ const int src_stride = x->plane[plane].src.stride;
+ const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0];
+ const uint8_t *src = &x->plane[plane].src.buf[src_idx];
+#endif
+
get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
NULL, &visible_cols, &visible_rows);
- return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
+
+#if CONFIG_DIST_8X8
+ if (plane == 0 && txb_width >= 8 && txb_height >= 8)
+ return av1_dist_8x8_diff(xd, src, src_stride, diff, diff_stride, txb_width,
+ txb_height, visible_cols, visible_rows, x->qindex);
+ else
+#endif
+ return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols,
+ visible_rows);
+}
+
+#if CONFIG_PALETTE || CONFIG_INTRABC
+int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) {
+ int val_count[256];
+ memset(val_count, 0, sizeof(val_count));
+ for (int r = 0; r < rows; ++r) {
+ for (int c = 0; c < cols; ++c) {
+ ++val_count[src[r * stride + c]];
+ }
+ }
+ int n = 0;
+ for (int i = 0; i < 256; ++i) {
+ if (val_count[i]) ++n;
+ }
+ return n;
}
+#if CONFIG_HIGHBITDEPTH
+int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
+ int bit_depth) {
+ assert(bit_depth <= 12);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ int val_count[1 << 12];
+ memset(val_count, 0, (1 << 12) * sizeof(val_count[0]));
+ for (int r = 0; r < rows; ++r) {
+ for (int c = 0; c < cols; ++c) {
+ ++val_count[src[r * stride + c]];
+ }
+ }
+ int n = 0;
+ for (int i = 0; i < (1 << bit_depth); ++i) {
+ if (val_count[i]) ++n;
+ }
+ return n;
+}
+#endif // CONFIG_HIGHBITDEPTH
+#endif // CONFIG_PALETTE || CONFIG_INTRABC
+
void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
OUTPUT_STATUS output_status) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
-#if CONFIG_DAALA_DIST
- int qm = OD_HVS_QM;
- int use_activity_masking = 0;
-#if CONFIG_PVQ
- use_activity_masking = x->daala_enc.use_activity_masking;
-#endif // CONFIG_PVQ
+#if CONFIG_DIST_8X8
struct macroblockd_plane *const pd = &xd->plane[plane];
-#else // CONFIG_DAALA_DIST
+#else // CONFIG_DIST_8X8
const struct macroblockd_plane *const pd = &xd->plane[plane];
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
- if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {
+ if (cpi->sf.use_transform_domain_distortion && !CONFIG_DIST_8X8) {
// Transform domain distortion computation is more efficient as it does
// not involve an inverse transform, but it is less accurate.
const int buffer_length = tx_size_2d[tx_size];
@@ -1292,19 +1728,21 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
&this_sse) >>
shift;
#endif // CONFIG_HIGHBITDEPTH
-#elif CONFIG_HIGHBITDEPTH
- const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
- *out_dist =
- av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
- shift;
-#else
- *out_dist =
- av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
+#else // !CONFIG_PVQ
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length,
+ &this_sse, xd->bd) >>
+ shift;
+ else
+#endif
+ *out_dist =
+ av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
#endif // CONFIG_PVQ
*out_sse = this_sse >> shift;
} else {
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
-#if !CONFIG_PVQ || CONFIG_DAALA_DIST
+#if !CONFIG_PVQ || CONFIG_DIST_8X8
const int bsw = block_size_wide[tx_bsize];
const int bsh = block_size_high[tx_bsize];
#endif
@@ -1323,34 +1761,13 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
assert(cpi != NULL);
assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
-#if CONFIG_DAALA_DIST
- if (plane == 0 && bsw >= 8 && bsh >= 8) {
- if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
- const int pred_stride = block_size_wide[plane_bsize];
- const int pred_idx = (blk_row * pred_stride + blk_col)
- << tx_size_wide_log2[0];
- const int16_t *pred = &pd->pred[pred_idx];
- int i, j;
- DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
-
- for (j = 0; j < bsh; j++)
- for (i = 0; i < bsw; i++)
- pred8[j * bsw + i] = pred[j * pred_stride + i];
- *out_sse = av1_daala_dist(src, src_stride, pred8, bsw, bsw, bsh, qm,
- use_activity_masking, x->qindex);
- } else {
- *out_sse = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
- qm, use_activity_masking, x->qindex);
- }
- } else
-#endif // CONFIG_DAALA_DIST
{
const int diff_stride = block_size_wide[plane_bsize];
const int diff_idx = (blk_row * diff_stride + blk_col)
<< tx_size_wide_log2[0];
const int16_t *diff = &p->src_diff[diff_idx];
- *out_sse = sum_squares_visible(xd, plane, diff, diff_stride, blk_row,
- blk_col, plane_bsize, tx_bsize);
+ *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
+ plane_bsize, tx_bsize);
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
*out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
@@ -1360,15 +1777,8 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
if (eob) {
if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
-#if CONFIG_DAALA_DIST
- if (plane == 0 && bsw >= 8 && bsh >= 8)
- *out_dist = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
- qm, use_activity_masking, x->qindex);
- else
-#endif // CONFIG_DAALA_DIST
- *out_dist =
- pixel_sse(cpi, xd, plane, src, src_stride, dst, dst_stride,
- blk_row, blk_col, plane_bsize, tx_bsize);
+ *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride,
+ blk_row, blk_col, plane_bsize, tx_bsize);
} else {
#if CONFIG_HIGHBITDEPTH
uint8_t *recon;
@@ -1399,37 +1809,44 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
#endif // !CONFIG_PVQ
const PLANE_TYPE plane_type = get_plane_type(plane);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-
- av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, recon,
- MAX_TX_SIZE, eob);
-
-#if CONFIG_DAALA_DIST
- if (plane == 0 && bsw >= 8 && bsh >= 8) {
- *out_dist = av1_daala_dist(src, src_stride, recon, MAX_TX_SIZE, bsw,
- bsh, qm, use_activity_masking, x->qindex);
- } else {
- if (plane == 0) {
- // Save decoded pixels for inter block in pd->pred to avoid
- // block_8x8_rd_txfm_daala_dist() need to produce them
- // by calling av1_inverse_transform_block() again.
- const int pred_stride = block_size_wide[plane_bsize];
- const int pred_idx = (blk_row * pred_stride + blk_col)
- << tx_size_wide_log2[0];
- int16_t *pred = &pd->pred[pred_idx];
- int i, j;
+ TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
+ av1_inverse_transform_block(xd, dqcoeff,
+#if CONFIG_LGT
+ xd->mi[0]->mbmi.mode,
+#endif
+ tx_type, tx_size, recon, MAX_TX_SIZE, eob);
+
+#if CONFIG_DIST_8X8
+ if (plane == 0 && (bsw < 8 || bsh < 8)) {
+ // Save decoded pixels for inter block in pd->pred to avoid
+ // block_8x8_rd_txfm_daala_dist() need to produce them
+ // by calling av1_inverse_transform_block() again.
+ const int pred_stride = block_size_wide[plane_bsize];
+ const int pred_idx = (blk_row * pred_stride + blk_col)
+ << tx_size_wide_log2[0];
+ int16_t *pred = &pd->pred[pred_idx];
+ int i, j;
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bsh; j++)
+ for (i = 0; i < bsw; i++)
+ pred[j * pred_stride + i] =
+ CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i];
+ } else {
+#endif
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++)
pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
+#if CONFIG_HIGHBITDEPTH
}
-#endif // CONFIG_DAALA_DIST
- *out_dist =
- pixel_sse(cpi, xd, plane, src, src_stride, recon, MAX_TX_SIZE,
- blk_row, blk_col, plane_bsize, tx_bsize);
-#if CONFIG_DAALA_DIST
+#endif // CONFIG_HIGHBITDEPTH
}
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
+ *out_dist =
+ pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
+ blk_row, blk_col, plane_bsize, tx_bsize);
}
*out_dist *= 16;
} else {
@@ -1453,33 +1870,25 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
int64_t rd1, rd2, rd;
RD_STATS this_rd_stats;
- assert(tx_size == get_tx_size(plane, xd));
+#if !CONFIG_SUPERTX && !CONFIG_VAR_TX
+ assert(tx_size == av1_get_tx_size(plane, xd));
+#endif // !CONFIG_SUPERTX
av1_init_rd_stats(&this_rd_stats);
if (args->exit_early) return;
if (!is_inter_block(mbmi)) {
-#if CONFIG_CFL
-
-#if CONFIG_EC_ADAPT
- FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *const ec_ctx = cm->fc;
-#endif // CONFIG_EC_ADAPT
-
- av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
- blk_row, tx_size, plane_bsize);
-#else
av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
-#endif
#if CONFIG_DPCM_INTRA
const int block_raster_idx =
av1_block_index_to_raster_order(tx_size, block);
- const PREDICTION_MODE mode =
- (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
- TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
- xd, block, tx_size);
+ const PREDICTION_MODE mode = (plane == AOM_PLANE_Y)
+ ? get_y_mode(xd->mi[0], block_raster_idx)
+ : get_uv_mode(mbmi->uv_mode);
+ TX_TYPE tx_type =
+ av1_get_tx_type((plane == AOM_PLANE_Y) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
+ xd, blk_row, blk_col, block, tx_size);
if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
int8_t skip;
av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
@@ -1496,9 +1905,36 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
#if !CONFIG_TXK_SEL
// full forward transform and quantization
const int coeff_ctx = combine_entropy_contexts(*a, *l);
+#if DISABLE_TRELLISQ_SEARCH
+ av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ coeff_ctx, AV1_XFORM_QUANT_B);
+#else
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
+
+ const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+ tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
+ const int buffer_length = tx_size_2d[tx_size];
+ int64_t tmp_dist;
+ int64_t tmp;
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ tmp_dist =
+ av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >>
+ shift;
+ else
+#endif
+ tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift;
+
+ if (RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
+ av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
+ a, l);
+ } else {
+ args->exit_early = 1;
+ return;
+ }
+#endif // DISABLE_TRELLISQ_SEARCH
if (!is_inter_block(mbmi)) {
struct macroblock_plane *const p = &x->plane[plane];
@@ -1518,25 +1954,27 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
const int dst_stride = pd->dst.stride;
uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
- cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
+ // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
+ // intra predicted.
+ cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize);
}
#endif
#if CONFIG_DPCM_INTRA
CALCULATE_RD : {}
#endif // CONFIG_DPCM_INTRA
- rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
+ rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
if (args->this_rd + rd > args->best_rd) {
args->exit_early = 1;
return;
}
#if !CONFIG_PVQ
const PLANE_TYPE plane_type = get_plane_type(plane);
- const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
- const SCAN_ORDER *scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+ const TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
+ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
this_rd_stats.rate =
- av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l,
- args->use_fast_coef_costing);
+ av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
+ scan_order, a, l, args->use_fast_coef_costing);
#else // !CONFIG_PVQ
this_rd_stats.rate = x->rate;
#endif // !CONFIG_PVQ
@@ -1554,22 +1992,12 @@ CALCULATE_RD : {}
av1_set_txb_context(x, plane, block, tx_size, a, l);
#endif // !CONFIG_PVQ
- rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
- rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
+ rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
+ rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
// TODO(jingning): temporarily enabled only for luma component
rd = AOMMIN(rd1, rd2);
-#if CONFIG_DAALA_DIST
- if (plane == 0 && plane_bsize >= BLOCK_8X8 &&
- (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) {
- this_rd_stats.dist = 0;
- this_rd_stats.sse = 0;
- rd = 0;
- x->rate_4x4[block] = this_rd_stats.rate;
- }
-#endif // CONFIG_DAALA_DIST
-
#if !CONFIG_PVQ
this_rd_stats.skip &= !x->plane[plane].eobs[block];
#else
@@ -1579,113 +2007,93 @@ CALCULATE_RD : {}
args->this_rd += rd;
- if (args->this_rd > args->best_rd) {
- args->exit_early = 1;
- return;
+#if CONFIG_DIST_8X8
+ if (!(plane == 0 && plane_bsize >= BLOCK_8X8 &&
+ (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))) {
+#endif
+ if (args->this_rd > args->best_rd) {
+ args->exit_early = 1;
+ return;
+ }
+#if CONFIG_DIST_8X8
}
+#endif
}
-#if CONFIG_DAALA_DIST
-static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
- int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg) {
- struct rdcost_block_args *args = arg;
- MACROBLOCK *const x = args->x;
+#if CONFIG_DIST_8X8
+static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize,
+ struct rdcost_block_args *args) {
MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const struct macroblock_plane *const p = &x->plane[0];
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- int64_t rd, rd1, rd2;
- RD_STATS this_rd_stats;
- int qm = OD_HVS_QM;
- int use_activity_masking = 0;
-
- (void)tx_size;
-
- assert(plane == 0);
- assert(plane_bsize >= BLOCK_8X8);
-#if CONFIG_PVQ
- use_activity_masking = x->daala_enc.use_activity_masking;
-#endif // CONFIG_PVQ
- av1_init_rd_stats(&this_rd_stats);
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const uint8_t *src = &p->src.buf[0];
+ const uint8_t *dst = &pd->dst.buf[0];
+ const int16_t *pred = &pd->pred[0];
+ const int bw = block_size_wide[bsize];
+ const int bh = block_size_high[bsize];
- if (args->exit_early) return;
+ int i, j;
+ int64_t rd, rd1, rd2;
+ unsigned int tmp1, tmp2;
+ int qindex = x->qindex;
- {
- const struct macroblock_plane *const p = &x->plane[plane];
- struct macroblockd_plane *const pd = &xd->plane[plane];
+ assert((bw & 0x07) == 0);
+ assert((bh & 0x07) == 0);
- const int src_stride = p->src.stride;
- const int dst_stride = pd->dst.stride;
- const int diff_stride = block_size_wide[plane_bsize];
+#if CONFIG_HIGHBITDEPTH
+ uint8_t *pred8;
+ DECLARE_ALIGNED(16, uint16_t, pred16[MAX_TX_SQUARE]);
- const uint8_t *src =
- &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
- const uint8_t *dst =
- &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ pred8 = CONVERT_TO_BYTEPTR(pred16);
+ else
+ pred8 = (uint8_t *)pred16;
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
+#endif // CONFIG_HIGHBITDEPTH
- unsigned int tmp1, tmp2;
- int qindex = x->qindex;
- const int pred_stride = block_size_wide[plane_bsize];
- const int pred_idx = (blk_row * pred_stride + blk_col)
- << tx_size_wide_log2[0];
- int16_t *pred = &pd->pred[pred_idx];
- int i, j;
- const int tx_blk_size = 8;
-
- DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
-
- for (j = 0; j < tx_blk_size; j++)
- for (i = 0; i < tx_blk_size; i++)
- pred8[j * tx_blk_size + i] = pred[j * diff_stride + i];
-
- tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm,
- use_activity_masking, qindex);
- tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm,
- use_activity_masking, qindex);
-
- if (!is_inter_block(mbmi)) {
- this_rd_stats.sse = (int64_t)tmp1 * 16;
- this_rd_stats.dist = (int64_t)tmp2 * 16;
- } else {
- // For inter mode, the decoded pixels are provided in pd->pred,
- // while the predicted pixels are in dst.
- this_rd_stats.sse = (int64_t)tmp2 * 16;
- this_rd_stats.dist = (int64_t)tmp1 * 16;
- }
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bh; j++)
+ for (i = 0; i < bw; i++)
+ CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i];
+ } else {
+#endif
+ for (j = 0; j < bh; j++)
+ for (i = 0; i < bw; i++) pred8[j * bw + i] = pred[j * bw + i];
+#if CONFIG_HIGHBITDEPTH
}
+#endif // CONFIG_HIGHBITDEPTH
- rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
- if (args->this_rd + rd > args->best_rd) {
- args->exit_early = 1;
- return;
+ tmp1 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, bw, bsize, bw, bh, bw,
+ bh, qindex);
+ tmp2 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, bsize, bw, bh,
+ bw, bh, qindex);
+
+ if (!is_inter_block(mbmi)) {
+ args->rd_stats.sse = (int64_t)tmp1 * 16;
+ args->rd_stats.dist = (int64_t)tmp2 * 16;
+ } else {
+ // For inter mode, the decoded pixels are provided in pd->pred,
+ // while the predicted pixels are in dst.
+ args->rd_stats.sse = (int64_t)tmp2 * 16;
+ args->rd_stats.dist = (int64_t)tmp1 * 16;
}
- {
- const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
- const uint8_t txw_unit = tx_size_wide_unit[tx_size];
- const uint8_t txh_unit = tx_size_high_unit[tx_size];
- const int step = txw_unit * txh_unit;
- int offset_h = tx_size_high_unit[TX_4X4];
- // The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
- this_rd_stats.rate =
- x->rate_4x4[block - max_blocks_wide * offset_h - step] +
- x->rate_4x4[block - max_blocks_wide * offset_h] +
- x->rate_4x4[block - step] + x->rate_4x4[block];
- }
- rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
- rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
+ rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist);
+ rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse);
rd = AOMMIN(rd1, rd2);
- args->rd_stats.dist += this_rd_stats.dist;
- args->rd_stats.sse += this_rd_stats.sse;
-
- args->this_rd += rd;
+ args->rd_stats.rdcost = rd;
+ args->this_rd = rd;
- if (args->this_rd > args->best_rd) {
- args->exit_early = 1;
- return;
- }
+ if (args->this_rd > args->best_rd) args->exit_early = 1;
}
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
@@ -1705,15 +2113,13 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
-#if CONFIG_DAALA_DIST
- if (plane == 0 && bsize >= BLOCK_8X8 &&
+ av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
+ &args);
+#if CONFIG_DIST_8X8
+ if (!args.exit_early && plane == 0 && bsize >= BLOCK_8X8 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
- av1_foreach_8x8_transformed_block_in_yplane(
- xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
- else
-#endif // CONFIG_DAALA_DIST
- av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
- &args);
+ dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
+#endif
if (args.exit_early) {
av1_invalid_rd_stats(rd_stats);
@@ -1768,8 +2174,14 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
const MACROBLOCKD *const xd = &x->e_mbd;
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const int tx_select =
- cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT &&
+#if CONFIG_EXT_PARTITION_TYPES
+ // Currently these block shapes can only use 4x4
+ // transforms
+ mbmi->sb_type != BLOCK_4X16 &&
+ mbmi->sb_type != BLOCK_16X4 &&
+#endif
+ mbmi->sb_type >= BLOCK_8X8;
if (tx_select) {
const int is_inter = is_inter_block(mbmi);
@@ -1779,11 +2191,11 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
const int depth = tx_size_to_depth(coded_tx_size);
const int tx_size_ctx = get_tx_size_context(xd);
int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
-#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
tx_size == quarter_txsize_lookup[bsize]);
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+#endif
return r_tx_size;
} else {
return 0;
@@ -1796,6 +2208,10 @@ int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
TX_TYPE tx_type) {
if (plane > 0) return 0;
+#if CONFIG_VAR_TX
+ tx_size = get_min_tx_size(tx_size);
+#endif
+
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const int is_inter = is_inter_block(mbmi);
#if CONFIG_EXT_TX
@@ -1844,6 +2260,9 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size);
+#if CONFIG_PVQ
+ assert(tx_size >= TX_4X4);
+#endif // CONFIG_PVQ
assert(skip_prob > 0);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
@@ -1864,21 +2283,20 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
if (rd_stats->skip) {
if (is_inter) {
- rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
+ rd = RDCOST(x->rdmult, s1, rd_stats->sse);
} else {
- rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select,
- rd_stats->sse);
+ rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse);
}
} else {
- rd = RDCOST(x->rdmult, x->rddiv,
- rd_stats->rate + s0 + r_tx_size * tx_select, rd_stats->dist);
+ rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select,
+ rd_stats->dist);
}
if (tx_select) rd_stats->rate += r_tx_size;
if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
!(rd_stats->skip))
- rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
+ rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
return rd;
}
@@ -1895,6 +2313,11 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
// transforms should be considered for pruning
prune = prune_tx_types(cpi, bs, x, xd, -1);
+#if CONFIG_MRC_TX
+ // MRC_DCT only implemented for TX_32X32 so only include this tx in
+ // the search for TX_32X32
+ if (tx_type == MRC_DCT && tx_size != TX_32X32) return 1;
+#endif // CONFIG_MRC_TX
if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
return 1;
@@ -1929,7 +2352,8 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
return 0;
}
-#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
+#if CONFIG_EXT_INTER && \
+ (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA)
static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
MACROBLOCK *x, int *r, int64_t *d, int *s,
int64_t *sse, int64_t ref_best_rd) {
@@ -2020,14 +2444,13 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
if (this_rd_stats.skip)
- this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
+ this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
else
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
- this_rd_stats.dist);
+ this_rd =
+ RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
!this_rd_stats.skip)
- this_rd =
- AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
+ this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
if (this_rd < best_rd) {
best_rd = this_rd;
@@ -2068,13 +2491,12 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
continue;
}
if (this_rd_stats.skip)
- this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
+ this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
else
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
- this_rd_stats.dist);
- if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
this_rd =
- AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
+ RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
+ if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
+ this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
if (this_rd < best_rd) {
best_rd = this_rd;
@@ -2129,7 +2551,6 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
TX_TYPE best_tx_type = DCT_DCT;
#if CONFIG_TXK_SEL
TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
- const int num_blk = bsize_to_num_blk(bs);
#endif // CONFIG_TXK_SEL
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
const int is_inter = is_inter_block(mbmi);
@@ -2171,8 +2592,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
rect_tx_size);
if (rd < best_rd) {
#if CONFIG_TXK_SEL
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(best_txk_type[0]) * num_blk);
+ memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256);
#endif
best_tx_type = tx_type;
best_tx_size = rect_tx_size;
@@ -2278,8 +2698,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
last_rd = rd;
if (rd < best_rd) {
#if CONFIG_TXK_SEL
- memcpy(best_txk_type, mbmi->txk_type,
- sizeof(best_txk_type[0]) * num_blk);
+ memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256);
#endif
best_tx_type = tx_type;
best_tx_size = n;
@@ -2295,7 +2714,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
mbmi->tx_size = best_tx_size;
mbmi->tx_type = best_tx_type;
#if CONFIG_TXK_SEL
- memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * num_blk);
+ memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * 256);
#endif
#if CONFIG_VAR_TX
@@ -2366,21 +2785,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
int block = 0;
for (row = 0; row < max_blocks_high; row += stepr) {
for (col = 0; col < max_blocks_wide; col += stepc) {
-#if CONFIG_CFL
- const struct macroblockd_plane *const pd = &xd->plane[0];
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-
-#if CONFIG_EC_ADAPT
- FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *const ec_ctx = cpi->common.fc;
-#endif // CONFIG_EC_ADAPT
-
- av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row,
- tx_size, plane_bsize);
-#else
av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
-#endif
block += step;
}
}
@@ -2388,7 +2793,8 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
&this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
#if CONFIG_EXT_INTRA
- if (av1_is_directional_mode(mbmi->mode, bsize)) {
+ if (av1_is_directional_mode(mbmi->mode, bsize) &&
+ av1_use_angle_delta(bsize)) {
mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
}
@@ -2405,8 +2811,8 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
}
}
#endif // CONFIG_FILTER_INTRA
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + mode_cost,
- this_rd_stats.dist);
+ this_rd =
+ RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
return this_rd;
}
@@ -2620,7 +3026,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
if (tokenonly_rd_stats.rate == INT_MAX) continue;
this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
}
@@ -2773,15 +3179,17 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
src_stride, dst, dst_stride, xd->bd);
#endif
if (is_lossless) {
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
+ TX_TYPE tx_type =
+ av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
+ const SCAN_ORDER *scan_order =
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int coeff_ctx =
combine_entropy_contexts(tempa[idx], templ[idy]);
#if !CONFIG_PVQ
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
- ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
- tempa + idx, templ + idy,
+ ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
+ scan_order, tempa + idx, templ + idy,
cpi->sf.use_fast_coef_costing);
skip = (p->eobs[block] == 0);
can_skip &= skip;
@@ -2806,28 +3214,38 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
templ[idy] = !skip;
can_skip &= skip;
#endif
- if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ if (RDCOST(x->rdmult, ratey, distortion) >= best_rd)
goto next_highbd;
#if CONFIG_PVQ
if (!skip)
#endif
av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+ mode,
+#endif
DCT_DCT, tx_size, dst, dst_stride,
p->eobs[block]);
} else {
int64_t dist;
unsigned int tmp;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
+ TX_TYPE tx_type =
+ av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
+ const SCAN_ORDER *scan_order =
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int coeff_ctx =
combine_entropy_contexts(tempa[idx], templ[idy]);
#if !CONFIG_PVQ
+#if DISABLE_TRELLISQ_SEARCH
+ av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
+ tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
+#else
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
- templ + idy);
- ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
- tempa + idx, templ + idy,
+ av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size,
+ tempa + idx, templ + idy);
+#endif // DISABLE_TRELLISQ_SEARCH
+ ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
+ scan_order, tempa + idx, templ + idy,
cpi->sf.use_fast_coef_costing);
skip = (p->eobs[block] == 0);
can_skip &= skip;
@@ -2855,19 +3273,22 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
if (!skip)
#endif
av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+ mode,
+#endif
tx_type, tx_size, dst, dst_stride,
p->eobs[block]);
cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
dist = (int64_t)tmp << 4;
distortion += dist;
- if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ if (RDCOST(x->rdmult, ratey, distortion) >= best_rd)
goto next_highbd;
}
}
}
rate += ratey;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+ this_rd = RDCOST(x->rdmult, rate, distortion);
if (this_rd < best_rd) {
*bestrate = rate;
@@ -2966,14 +3387,24 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride,
dst, dst_stride);
#endif // !CONFIG_PVQ
-
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
+ TX_TYPE tx_type =
+ av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
+ const SCAN_ORDER *scan_order =
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]);
#if CONFIG_CB4X4
block = 4 * block;
#endif // CONFIG_CB4X4
#if !CONFIG_PVQ
+#if DISABLE_TRELLISQ_SEARCH
+ av1_xform_quant(cm, x, 0, block,
+#if CONFIG_CB4X4
+ 2 * (row + idy), 2 * (col + idx),
+#else
+ row + idy, col + idx,
+#endif // CONFIG_CB4X4
+ BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
+#else
const AV1_XFORM_QUANT xform_quant =
is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
av1_xform_quant(cm, x, 0, block,
@@ -2984,12 +3415,12 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
#endif // CONFIG_CB4X4
BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
- av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
+ av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, tempa + idx,
templ + idy);
-
- ratey +=
- av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx,
- templ + idy, cpi->sf.use_fast_coef_costing);
+#endif // DISABLE_TRELLISQ_SEARCH
+ ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, scan_order,
+ tempa + idx, templ + idy,
+ cpi->sf.use_fast_coef_costing);
skip = (p->eobs[block] == 0);
can_skip &= skip;
tempa[idx] = !skip;
@@ -3028,6 +3459,9 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
if (!skip)
#endif // CONFIG_PVQ
av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+ mode,
+#endif
tx_type, tx_size, dst, dst_stride,
p->eobs[block]);
unsigned int tmp;
@@ -3036,14 +3470,16 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
distortion += dist;
}
- if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
- goto next;
+ if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next;
if (is_lossless) { // Calculate inverse txfm *after* RD cost.
#if CONFIG_PVQ
if (!skip)
#endif // CONFIG_PVQ
av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
+#if CONFIG_LGT
+ mode,
+#endif
DCT_DCT, tx_size, dst, dst_stride,
p->eobs[block]);
}
@@ -3051,7 +3487,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
}
rate += ratey;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+ this_rd = RDCOST(x->rdmult, rate, distortion);
if (this_rd < best_rd) {
*bestrate = rate;
@@ -3153,9 +3589,9 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
cpi, mb, idy, idx, &best_mode, bmode_costs,
xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
&ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
-#if !CONFIG_DAALA_DIST
+#if !CONFIG_DIST_8X8
if (this_rd >= best_rd - total_rd) return INT64_MAX;
-#endif // !CONFIG_DAALA_DIST
+#endif // !CONFIG_DIST_8X8
total_rd += this_rd;
cost += r;
total_distortion += d;
@@ -3172,7 +3608,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
}
mbmi->mode = mic->bmi[3].as_mode;
-#if CONFIG_DAALA_DIST
+#if CONFIG_DIST_8X8
{
const struct macroblock_plane *p = &mb->plane[0];
const struct macroblockd_plane *pd = &xd->plane[0];
@@ -3180,18 +3616,16 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
const int dst_stride = pd->dst.stride;
uint8_t *src = p->src.buf;
uint8_t *dst = pd->dst.buf;
- int use_activity_masking = 0;
- int qm = OD_HVS_QM;
#if CONFIG_PVQ
use_activity_masking = mb->daala_enc.use_activity_masking;
#endif // CONFIG_PVQ
// Daala-defined distortion computed for the block of 8x8 pixels
- total_distortion = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8,
- qm, use_activity_masking, mb->qindex)
+ total_distortion = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride,
+ BLOCK_8X8, 8, 8, 8, 8, mb->qindex)
<< 4;
}
-#endif // CONFIG_DAALA_DIST
+#endif // CONFIG_DIST_8X8
// Add in the cost of the transform type
if (!is_lossless) {
int rate_tx_type = 0;
@@ -3218,7 +3652,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
*rate_y = tot_rate_y;
*distortion = total_distortion;
- return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
+ return RDCOST(mb->rdmult, cost, total_distortion);
}
#if CONFIG_FILTER_INTRA
@@ -3261,7 +3695,7 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
this_rate = tokenonly_rd_stats.rate +
av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) +
write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
@@ -3321,7 +3755,7 @@ static int64_t calc_rd_given_intra_angle(
this_rate = tokenonly_rd_stats.rate + mode_cost +
write_uniform_cost(2 * max_angle_delta + 1,
mbmi->angle_delta[0] + max_angle_delta);
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
@@ -3496,8 +3930,8 @@ static void angle_estimation(const uint8_t *src, int src_stride, int rows,
uint8_t *directional_mode_skip_mask) {
memset(directional_mode_skip_mask, 0,
INTRA_MODES * sizeof(*directional_mode_skip_mask));
- // Sub-8x8 blocks do not use extra directions.
- if (bsize < BLOCK_8X8) return;
+ // Check if angle_delta is used
+ if (!av1_use_angle_delta(bsize)) return;
uint64_t hist[DIRECTIONAL_MODES];
memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
src += src_stride;
@@ -3551,8 +3985,8 @@ static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
uint8_t *directional_mode_skip_mask) {
memset(directional_mode_skip_mask, 0,
INTRA_MODES * sizeof(*directional_mode_skip_mask));
- // Sub-8x8 blocks do not use extra directions.
- if (bsize < BLOCK_8X8) return;
+ // Check if angle_delta is used
+ if (!av1_use_angle_delta(bsize)) return;
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint64_t hist[DIRECTIONAL_MODES];
memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
@@ -3608,7 +4042,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, int64_t best_rd) {
- uint8_t mode_idx;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
MB_MODE_INFO *const mbmi = &mic->mbmi;
@@ -3683,7 +4116,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
x->use_default_intra_tx_type = 0;
/* Y Search for intra prediction mode */
- for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
+ for (int mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
RD_STATS this_rd_stats;
int this_rate, this_rate_tokenonly, s;
int64_t this_distortion, this_rd, this_model_rd;
@@ -3692,7 +4125,8 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
mbmi->mode = best_mbmi.mode;
x->use_default_intra_tx_type = 0;
} else {
- mbmi->mode = mode_idx;
+ assert(mode_idx < INTRA_MODES);
+ mbmi->mode = intra_rd_search_mode_order[mode_idx];
}
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, &pre_buf);
@@ -3708,7 +4142,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_EXT_INTRA
is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
- if (is_directional_mode) {
+ if (is_directional_mode && av1_use_angle_delta(bsize)) {
this_rd_stats.rate = INT_MAX;
rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
bmode_costs[mbmi->mode], best_rd, &best_model_rd);
@@ -3754,11 +4188,13 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
this_rate +=
cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
#endif // CONFIG_INTRA_INTERP
- this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
- MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+ if (av1_use_angle_delta(bsize)) {
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
+ MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+ }
}
#endif // CONFIG_EXT_INTRA
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
#if CONFIG_FILTER_INTRA
if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) {
filter_intra_mode_skip_mask ^= (1 << mbmi->mode);
@@ -3785,16 +4221,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
od_encode_rollback(&x->daala_enc, &post_buf);
#endif // CONFIG_PVQ
-#if CONFIG_CFL
- // Perform one extra txfm_rd_in_plane() call, this time with the best value so
- // we can store reconstructed luma values
- RD_STATS this_rd_stats;
- x->cfl_store_y = 1;
- txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize,
- mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing);
- x->cfl_store_y = 0;
-#endif
-
#if CONFIG_PALETTE
if (try_palette) {
rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
@@ -3826,7 +4252,7 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
+ const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
int plane;
int is_cost_valid = 1;
av1_init_rd_stats(rd_stats);
@@ -3857,9 +4283,8 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
break;
}
av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >
- ref_best_rd &&
- RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse) > ref_best_rd) {
+ if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd &&
+ RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) {
is_cost_valid = 0;
break;
}
@@ -3875,13 +4300,6 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
}
#if CONFIG_VAR_TX
-// FIXME crop these calls
-static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
- TX_SIZE tx_size) {
- return aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size],
- tx_size_high[tx_size]);
-}
-
void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int blk_row, int blk_col, int plane, int block,
int plane_bsize, const ENTROPY_CONTEXT *a,
@@ -3890,18 +4308,23 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
MACROBLOCKD *xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
+
+#if CONFIG_TXK_SEL
+ av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, a, l, 0, rd_stats);
+ return;
+#endif
+
int64_t tmp;
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
PLANE_TYPE plane_type = get_plane_type(plane);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ TX_TYPE tx_type =
+ av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
const SCAN_ORDER *const scan_order =
- get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
int bh = block_size_high[txm_bsize];
int bw = block_size_wide[txm_bsize];
- int txb_h = tx_size_high_unit[tx_size];
- int txb_w = tx_size_wide_unit[tx_size];
-
int src_stride = p->src.stride;
uint8_t *src =
&p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
@@ -3914,30 +4337,15 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
#else
DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
#endif // CONFIG_HIGHBITDEPTH
- int max_blocks_high = block_size_high[plane_bsize];
- int max_blocks_wide = block_size_wide[plane_bsize];
- const int diff_stride = max_blocks_wide;
+ const int diff_stride = block_size_wide[plane_bsize];
const int16_t *diff =
&p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
int txb_coeff_cost;
assert(tx_size < TX_SIZES_ALL);
- if (xd->mb_to_bottom_edge < 0)
- max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
- if (xd->mb_to_right_edge < 0)
- max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
-
- max_blocks_high >>= tx_size_wide_log2[0];
- max_blocks_wide >>= tx_size_wide_log2[0];
-
int coeff_ctx = get_entropy_context(tx_size, a, l);
- av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- coeff_ctx, AV1_XFORM_QUANT_FP);
-
- av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
-
// TODO(any): Use av1_dist_block to compute distortion
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -3954,21 +4362,35 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
0, bw, bh);
#endif // CONFIG_HIGHBITDEPTH
- if (blk_row + txb_h > max_blocks_high || blk_col + txb_w > max_blocks_wide) {
- int idx, idy;
- int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
- int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
- tmp = 0;
- for (idy = 0; idy < blocks_height; ++idy) {
- for (idx = 0; idx < blocks_width; ++idx) {
- const int16_t *d =
- diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]);
- tmp += sum_squares_2d(d, diff_stride, 0);
- }
- }
- } else {
- tmp = sum_squares_2d(diff, diff_stride, tx_size);
+#if DISABLE_TRELLISQ_SEARCH
+ av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ coeff_ctx, AV1_XFORM_QUANT_B);
+
+#else
+ av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ coeff_ctx, AV1_XFORM_QUANT_FP);
+
+ const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ const int buffer_length = tx_size_2d[tx_size];
+ int64_t tmp_dist;
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ tmp_dist =
+ av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >>
+ shift;
+ else
+#endif
+ tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift;
+
+ if (RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
+ av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
+ a, l);
}
+#endif // DISABLE_TRELLISQ_SEARCH
+
+ tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
+ plane_bsize, txm_bsize);
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
@@ -3977,36 +4399,48 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
rd_stats->sse += tmp * 16;
const int eob = p->eobs[block];
+#if CONFIG_LGT
+ PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
+ av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, rec_buffer,
+ MAX_TX_SIZE, eob);
+#else
av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer,
MAX_TX_SIZE, eob);
+#endif
if (eob > 0) {
- if (txb_w + blk_col > max_blocks_wide ||
- txb_h + blk_row > max_blocks_high) {
- int idx, idy;
- unsigned int this_dist;
- int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
- int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
- tmp = 0;
- for (idy = 0; idy < blocks_height; ++idy) {
- for (idx = 0; idx < blocks_width; ++idx) {
- uint8_t *const s =
- src + ((idy * src_stride + idx) << tx_size_wide_log2[0]);
- uint8_t *const r =
- rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]);
- cpi->fn_ptr[0].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
- tmp += this_dist;
- }
+#if CONFIG_DIST_8X8
+ if (plane == 0 && (bw < 8 && bh < 8)) {
+ // Save sub8x8 luma decoded pixels
+ // since 8x8 luma decoded pixels are not available for daala-dist
+ // after recursive split of BLOCK_8x8 is done.
+ const int pred_stride = block_size_wide[plane_bsize];
+ const int pred_idx = (blk_row * pred_stride + blk_col)
+ << tx_size_wide_log2[0];
+ int16_t *decoded = &pd->pred[pred_idx];
+ int i, j;
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (j = 0; j < bh; j++)
+ for (i = 0; i < bw; i++)
+ decoded[j * pred_stride + i] =
+ CONVERT_TO_SHORTPTR(rec_buffer)[j * MAX_TX_SIZE + i];
+ } else {
+#endif
+ for (j = 0; j < bh; j++)
+ for (i = 0; i < bw; i++)
+ decoded[j * pred_stride + i] = rec_buffer[j * MAX_TX_SIZE + i];
+#if CONFIG_HIGHBITDEPTH
}
- } else {
- uint32_t this_dist;
- cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE,
- &this_dist);
- tmp = this_dist;
+#endif // CONFIG_HIGHBITDEPTH
}
+#endif // CONFIG_DIST_8X8
+ tmp = pixel_dist(cpi, x, plane, src, src_stride, rec_buffer, MAX_TX_SIZE,
+ blk_row, blk_col, plane_bsize, txm_bsize);
}
rd_stats->dist += tmp * 16;
- txb_coeff_cost =
- av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, 0);
+ txb_coeff_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block,
+ tx_size, scan_order, a, l, 0);
rd_stats->rate += txb_coeff_cost;
rd_stats->skip &= (eob == 0);
@@ -4038,14 +4472,35 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
int64_t this_rd = INT64_MAX;
ENTROPY_CONTEXT *pta = ta + blk_col;
ENTROPY_CONTEXT *ptl = tl + blk_row;
- int coeff_ctx, i;
+ int i;
int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
mbmi->sb_type, tx_size);
int64_t sum_rd = INT64_MAX;
int tmp_eob = 0;
int zero_blk_rate;
RD_STATS sum_rd_stats;
- const int tx_size_ctx = txsize_sqr_map[tx_size];
+#if CONFIG_TXK_SEL
+ TX_TYPE best_tx_type = TX_TYPES;
+ int txk_idx = (blk_row << 4) + blk_col;
+#endif
+#if CONFIG_RECT_TX_EXT
+ TX_SIZE quarter_txsize = quarter_txsize_lookup[mbmi->sb_type];
+ int check_qttx = is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) &&
+ tx_size == max_txsize_rect_lookup[mbmi->sb_type] &&
+ quarter_txsize != tx_size;
+ int is_qttx_picked = 0;
+ int eobs_qttx[2] = { 0, 0 };
+ int skip_qttx[2] = { 0, 0 };
+ int block_offset_qttx = check_qttx
+ ? tx_size_wide_unit[quarter_txsize] *
+ tx_size_high_unit[quarter_txsize]
+ : 0;
+ int blk_row_offset, blk_col_offset;
+ int is_wide_qttx =
+ tx_size_wide_unit[quarter_txsize] > tx_size_high_unit[quarter_txsize];
+ blk_row_offset = is_wide_qttx ? tx_size_high_unit[quarter_txsize] : 0;
+ blk_col_offset = is_wide_qttx ? 0 : tx_size_wide_unit[quarter_txsize];
+#endif
av1_init_rd_stats(&sum_rd_stats);
@@ -4056,15 +4511,25 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
return;
}
- coeff_ctx = get_entropy_context(tx_size, pta, ptl);
-
av1_init_rd_stats(rd_stats);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+#if CONFIG_LV_MAP
+ TX_SIZE txs_ctx = get_txsize_context(tx_size);
+ TXB_CTX txb_ctx;
+ get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx);
+ zero_blk_rate =
+ av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_ctx.txb_skip_ctx], 1);
+#else
+ int tx_size_ctx = txsize_sqr_map[tx_size];
+ int coeff_ctx = get_entropy_context(tx_size, pta, ptl);
zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
[coeff_ctx][EOB_TOKEN];
+#endif
+ rd_stats->ref_rdcost = ref_best_rd;
+ rd_stats->zero_rate = zero_blk_rate;
if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
inter_tx_size[0][0] = tx_size;
@@ -4081,8 +4546,8 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
}
}
- if ((RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >=
- RDCOST(x->rdmult, x->rddiv, zero_blk_rate, rd_stats->sse) ||
+ if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
+ RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
rd_stats->skip == 1) &&
!xd->lossless[mbmi->segment_id]) {
#if CONFIG_RD_DEBUG
@@ -4094,6 +4559,9 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
rd_stats->skip = 1;
x->blk_skip[plane][blk_row * bw + blk_col] = 1;
p->eobs[block] = 0;
+#if CONFIG_TXK_SEL
+ mbmi->txk_type[txk_idx] = DCT_DCT;
+#endif
} else {
x->blk_skip[plane][blk_row * bw + blk_col] = 0;
rd_stats->skip = 0;
@@ -4102,23 +4570,143 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
rd_stats->rate +=
av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
- this_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
+#if CONFIG_RECT_TX_EXT
+ if (check_qttx) {
+ assert(blk_row == 0 && blk_col == 0);
+ rd_stats->rate += av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 0);
+ }
+#endif
+ this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
+#if CONFIG_LV_MAP
+ tmp_eob = p->txb_entropy_ctx[block];
+#else
tmp_eob = p->eobs[block];
+#endif
+
+#if CONFIG_TXK_SEL
+ best_tx_type = mbmi->txk_type[txk_idx];
+#endif
+
+#if CONFIG_RECT_TX_EXT
+ if (check_qttx) {
+ assert(blk_row == 0 && blk_col == 0 && block == 0 && plane == 0);
+
+ RD_STATS rd_stats_tmp, rd_stats_qttx;
+ int64_t rd_qttx;
+
+ av1_init_rd_stats(&rd_stats_qttx);
+ av1_init_rd_stats(&rd_stats_tmp);
+
+ av1_tx_block_rd_b(cpi, x, quarter_txsize, 0, 0, plane, 0, plane_bsize,
+ pta, ptl, &rd_stats_qttx);
+
+ tx_size_ctx = txsize_sqr_map[quarter_txsize];
+ coeff_ctx = get_entropy_context(quarter_txsize, pta, ptl);
+ zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
+ [coeff_ctx][EOB_TOKEN];
+ if ((RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist) >=
+ RDCOST(x->rdmult, zero_blk_rate, rd_stats_qttx.sse) ||
+ rd_stats_qttx.skip == 1) &&
+ !xd->lossless[mbmi->segment_id]) {
+#if CONFIG_RD_DEBUG
+ av1_update_txb_coeff_cost(&rd_stats_qttx, plane, quarter_txsize, 0, 0,
+ zero_blk_rate - rd_stats_qttx.rate);
+#endif // CONFIG_RD_DEBUG
+ rd_stats_qttx.rate = zero_blk_rate;
+ rd_stats_qttx.dist = rd_stats_qttx.sse;
+ rd_stats_qttx.skip = 1;
+ x->blk_skip[plane][blk_row * bw + blk_col] = 1;
+ skip_qttx[0] = 1;
+ p->eobs[block] = 0;
+ } else {
+ x->blk_skip[plane][blk_row * bw + blk_col] = 0;
+ skip_qttx[0] = 0;
+ rd_stats->skip = 0;
+ }
+
+ // Second tx block
+ av1_tx_block_rd_b(cpi, x, quarter_txsize, blk_row_offset, blk_col_offset,
+ plane, block_offset_qttx, plane_bsize, pta, ptl,
+ &rd_stats_tmp);
+
+ av1_set_txb_context(x, plane, 0, quarter_txsize, pta, ptl);
+ coeff_ctx = get_entropy_context(quarter_txsize, pta + blk_col_offset,
+ ptl + blk_row_offset);
+ zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
+ [coeff_ctx][EOB_TOKEN];
+ if ((RDCOST(x->rdmult, rd_stats_tmp.rate, rd_stats_tmp.dist) >=
+ RDCOST(x->rdmult, zero_blk_rate, rd_stats_tmp.sse) ||
+ rd_stats_tmp.skip == 1) &&
+ !xd->lossless[mbmi->segment_id]) {
+#if CONFIG_RD_DEBUG
+ av1_update_txb_coeff_cost(&rd_stats_tmp, plane, quarter_txsize, 0, 0,
+ zero_blk_rate - rd_stats_tmp.rate);
+#endif // CONFIG_RD_DEBUG
+ rd_stats_tmp.rate = zero_blk_rate;
+ rd_stats_tmp.dist = rd_stats_tmp.sse;
+ rd_stats_tmp.skip = 1;
+ x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 1;
+ skip_qttx[1] = 1;
+ p->eobs[block_offset_qttx] = 0;
+ } else {
+ x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 0;
+ skip_qttx[1] = 0;
+ rd_stats_tmp.skip = 0;
+ }
+
+ av1_merge_rd_stats(&rd_stats_qttx, &rd_stats_tmp);
+
+ if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
+ rd_stats_qttx.rate +=
+ av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
+ }
+ rd_stats_qttx.rate +=
+ av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 1);
+ rd_qttx = RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist);
+#if CONFIG_LV_MAP
+ eobs_qttx[0] = p->txb_entropy_ctx[0];
+ eobs_qttx[1] = p->txb_entropy_ctx[block_offset_qttx];
+#else
+ eobs_qttx[0] = p->eobs[0];
+ eobs_qttx[1] = p->eobs[block_offset_qttx];
+#endif
+ if (rd_qttx < this_rd) {
+ is_qttx_picked = 1;
+ this_rd = rd_qttx;
+ rd_stats->rate = rd_stats_qttx.rate;
+ rd_stats->dist = rd_stats_qttx.dist;
+ rd_stats->sse = rd_stats_qttx.sse;
+ rd_stats->skip = rd_stats_qttx.skip;
+ rd_stats->rdcost = rd_stats_qttx.rdcost;
+ }
+ av1_get_entropy_contexts(plane_bsize, 0, pd, ta, tl);
+ }
+#endif
}
+#if CONFIG_MRC_TX
+ // If the tx type we are trying is MRC_DCT, we cannot partition the transform
+ // into anything smaller than TX_32X32
+ if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && mbmi->tx_type != MRC_DCT) {
+#else
if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
+#endif
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bsl = tx_size_wide_unit[sub_txs];
int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
RD_STATS this_rd_stats;
int this_cost_valid = 1;
int64_t tmp_rd = 0;
-
+#if CONFIG_DIST_8X8
+ int sub8x8_eob[4];
+#endif
sum_rd_stats.rate =
av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
assert(tx_size < TX_SIZES_ALL);
+ ref_best_rd = AOMMIN(this_rd, ref_best_rd);
+
for (i = 0; i < 4 && this_cost_valid; ++i) {
int offsetr = blk_row + (i >> 1) * bsl;
int offsetc = blk_col + (i & 0x01) * bsl;
@@ -4129,30 +4717,170 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
depth + 1, plane_bsize, ta, tl, tx_above, tx_left,
&this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid,
rd_stats_stack);
-
+#if CONFIG_DIST_8X8
+ if (plane == 0 && tx_size == TX_8X8) {
+ sub8x8_eob[i] = p->eobs[block];
+ }
+#endif // CONFIG_DIST_8X8
av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
- tmp_rd =
- RDCOST(x->rdmult, x->rddiv, sum_rd_stats.rate, sum_rd_stats.dist);
+ tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
+#if !CONFIG_DIST_8X8
if (this_rd < tmp_rd) break;
+#endif
block += sub_step;
}
+#if CONFIG_DIST_8X8
+ if (this_cost_valid && plane == 0 && tx_size == TX_8X8) {
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+
+ const uint8_t *src =
+ &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
+ const uint8_t *dst =
+ &pd->dst
+ .buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+
+ int64_t dist_8x8;
+ int qindex = x->qindex;
+ const int pred_stride = block_size_wide[plane_bsize];
+ const int pred_idx = (blk_row * pred_stride + blk_col)
+ << tx_size_wide_log2[0];
+ int16_t *pred = &pd->pred[pred_idx];
+ int j;
+ int row, col;
+
+#if CONFIG_HIGHBITDEPTH
+ uint8_t *pred8;
+ DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
+#endif // CONFIG_HIGHBITDEPTH
+
+ dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride,
+ BLOCK_8X8, 8, 8, 8, 8, qindex) *
+ 16;
+ sum_rd_stats.sse = dist_8x8;
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ pred8 = CONVERT_TO_BYTEPTR(pred8_16);
+ else
+ pred8 = (uint8_t *)pred8_16;
+#endif
+
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (row = 0; row < 2; ++row) {
+ for (col = 0; col < 2; ++col) {
+ int idx = row * 2 + col;
+ int eob = sub8x8_eob[idx];
+
+ if (eob > 0) {
+ for (j = 0; j < 4; j++)
+ for (i = 0; i < 4; i++)
+ CONVERT_TO_SHORTPTR(pred8)
+ [(row * 4 + j) * 8 + 4 * col + i] =
+ pred[(row * 4 + j) * pred_stride + 4 * col + i];
+ } else {
+ for (j = 0; j < 4; j++)
+ for (i = 0; i < 4; i++)
+ CONVERT_TO_SHORTPTR(pred8)
+ [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR(
+ dst)[(row * 4 + j) * dst_stride + 4 * col + i];
+ }
+ }
+ }
+ } else {
+#endif
+ for (row = 0; row < 2; ++row) {
+ for (col = 0; col < 2; ++col) {
+ int idx = row * 2 + col;
+ int eob = sub8x8_eob[idx];
+
+ if (eob > 0) {
+ for (j = 0; j < 4; j++)
+ for (i = 0; i < 4; i++)
+ pred8[(row * 4 + j) * 8 + 4 * col + i] =
+ pred[(row * 4 + j) * pred_stride + 4 * col + i];
+ } else {
+ for (j = 0; j < 4; j++)
+ for (i = 0; i < 4; i++)
+ pred8[(row * 4 + j) * 8 + 4 * col + i] =
+ dst[(row * 4 + j) * dst_stride + 4 * col + i];
+ }
+ }
+ }
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+ dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, 8, BLOCK_8X8, 8,
+ 8, 8, 8, qindex) *
+ 16;
+ sum_rd_stats.dist = dist_8x8;
+ tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
+ }
+#endif // CONFIG_DIST_8X8
if (this_cost_valid) sum_rd = tmp_rd;
}
if (this_rd < sum_rd) {
int idx, idy;
- for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0);
- for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0);
+#if CONFIG_RECT_TX_EXT
+ TX_SIZE tx_size_selected = is_qttx_picked ? quarter_txsize : tx_size;
+#else
+ TX_SIZE tx_size_selected = tx_size;
+#endif
+
+#if CONFIG_RECT_TX_EXT
+ if (is_qttx_picked) {
+ assert(blk_row == 0 && blk_col == 0 && plane == 0);
+#if CONFIG_LV_MAP
+ p->txb_entropy_ctx[0] = eobs_qttx[0];
+ p->txb_entropy_ctx[block_offset_qttx] = eobs_qttx[1];
+#else
+ p->eobs[0] = eobs_qttx[0];
+ p->eobs[block_offset_qttx] = eobs_qttx[1];
+#endif
+ } else {
+#endif
+#if CONFIG_LV_MAP
+ p->txb_entropy_ctx[block] = tmp_eob;
+#else
+ p->eobs[block] = tmp_eob;
+#endif
+#if CONFIG_RECT_TX_EXT
+ }
+#endif
+
+ av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl);
+#if CONFIG_RECT_TX_EXT
+ if (is_qttx_picked)
+ av1_set_txb_context(x, plane, block_offset_qttx, tx_size_selected,
+ pta + blk_col_offset, ptl + blk_row_offset);
+#endif
+
txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
tx_size);
- inter_tx_size[0][0] = tx_size;
+ inter_tx_size[0][0] = tx_size_selected;
for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
- inter_tx_size[idy][idx] = tx_size;
- mbmi->tx_size = tx_size;
+ inter_tx_size[idy][idx] = tx_size_selected;
+ mbmi->tx_size = tx_size_selected;
+#if CONFIG_TXK_SEL
+ mbmi->txk_type[txk_idx] = best_tx_type;
+#endif
if (this_rd == INT64_MAX) *is_cost_valid = 0;
- x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
+#if CONFIG_RECT_TX_EXT
+ if (is_qttx_picked) {
+ x->blk_skip[plane][0] = skip_qttx[0];
+ x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = skip_qttx[1];
+ } else {
+#endif
+ x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
+#if CONFIG_RECT_TX_EXT
+ }
+#endif
} else {
*rd_stats = sum_rd_stats;
if (sum_rd == INT64_MAX) *is_cost_valid = 0;
@@ -4201,17 +4929,16 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd,
&is_cost_valid, rd_stats_stack);
av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- this_rd += AOMMIN(
- RDCOST(x->rdmult, x->rddiv, pn_rd_stats.rate, pn_rd_stats.dist),
- RDCOST(x->rdmult, x->rddiv, 0, pn_rd_stats.sse));
+ this_rd += AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
+ RDCOST(x->rdmult, 0, pn_rd_stats.sse));
block += step;
++block32;
}
}
}
- this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
- RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
+ this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
+ RDCOST(x->rdmult, 0, rd_stats->sse));
if (this_rd > ref_best_rd) is_cost_valid = 0;
if (!is_cost_valid) {
@@ -4247,6 +4974,7 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
mbmi->min_tx_size = AOMMIN(
mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col]));
+#if !CONFIG_TXK_SEL
#if CONFIG_EXT_TX
if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
cm->reduced_tx_set_used) > 1 &&
@@ -4266,20 +4994,21 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
[mbmi->tx_type];
}
}
-#else // CONFIG_EXT_TX
+#else
if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
rd_stats->rate +=
cpi->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
#endif // CONFIG_EXT_TX
+#endif // CONFIG_TXK_SEL
if (rd_stats->skip)
- rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
+ rd = RDCOST(x->rdmult, s1, rd_stats->sse);
else
- rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist);
+ rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
!(rd_stats->skip))
- rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
+ rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
return rd;
}
@@ -4299,6 +5028,12 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
TX_SIZE best_tx = max_txsize_lookup[bsize];
TX_SIZE best_min_tx_size = TX_SIZES_ALL;
uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ TX_TYPE txk_start = DCT_DCT;
+#if CONFIG_TXK_SEL
+ TX_TYPE txk_end = DCT_DCT + 1;
+#else
+ TX_TYPE txk_end = TX_TYPES;
+#endif
const int n4 = bsize_to_num_blk(bsize);
int idx, idy;
int prune = 0;
@@ -4326,9 +5061,14 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
for (idx = 0; idx < count32; ++idx)
av1_invalid_rd_stats(&rd_stats_stack[idx]);
- for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+ for (tx_type = txk_start; tx_type < txk_end; ++tx_type) {
RD_STATS this_rd_stats;
av1_init_rd_stats(&this_rd_stats);
+#if CONFIG_MRC_TX
+ // MRC_DCT only implemented for TX_32X32 so only include this tx in
+ // the search for TX_32X32
+ if (tx_type == MRC_DCT && max_tx_size != TX_32X32) continue;
+#endif // CONFIG_MRC_TX
#if CONFIG_EXT_TX
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
@@ -4384,7 +5124,6 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
const int tx_row = blk_row >> (1 - pd->subsampling_y);
@@ -4402,16 +5141,11 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
: mbmi->inter_tx_size[tx_row][tx_col];
if (tx_size == plane_tx_size) {
- int i;
ENTROPY_CONTEXT *ta = above_ctx + blk_col;
ENTROPY_CONTEXT *tl = left_ctx + blk_row;
av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
plane_bsize, ta, tl, rd_stats);
-
- for (i = 0; i < tx_size_wide_unit[tx_size]; ++i)
- ta[i] = !(p->eobs[block] == 0);
- for (i = 0; i < tx_size_high_unit[tx_size]; ++i)
- tl[i] = !(p->eobs[block] == 0);
+ av1_set_txb_context(x, plane, block, tx_size, ta, tl);
} else {
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bsl = tx_size_wide_unit[sub_txs];
@@ -4498,9 +5232,8 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- this_rd =
- AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
- RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
+ this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
+ RDCOST(x->rdmult, 0, rd_stats->sse));
if (this_rd > ref_best_rd) {
is_cost_valid = 0;
@@ -4543,7 +5276,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
&plane_block_height, &rows, &cols);
if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
#if CONFIG_FILTER_INTRA
mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
#endif // CONFIG_FILTER_INTRA
@@ -4689,7 +5422,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
}
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
*best_mbmi = *mbmi;
@@ -4727,7 +5460,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
av1_zero(filter_intra_mode_info);
mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
#if CONFIG_PALETTE
mbmi->palette_mode_info.palette_size[1] = 0;
#endif // CONFIG_PALETTE
@@ -4741,7 +5474,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) +
cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
write_uniform_cost(FILTER_INTRA_MODES, mode);
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
*rate = this_rate;
@@ -4754,7 +5487,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
if (filter_intra_selected_flag) {
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
filter_intra_mode_info.use_filter_intra_mode[1];
mbmi->filter_intra_mode_info.filter_intra_mode[1] =
@@ -4782,7 +5515,7 @@ static int64_t pick_intra_angle_routine_sbuv(
if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
return INT64_MAX;
this_rate = tokenonly_rd_stats.rate + rate_overhead;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
*best_rd = this_rd;
*best_angle_delta = mbmi->angle_delta[1];
@@ -4852,8 +5585,172 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
#endif // CONFIG_EXT_INTRA
+#if CONFIG_CFL
+static int64_t cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
+ const int y_averages_q3[MAX_NUM_TXB],
+ const uint8_t *src, int src_stride, int width,
+ int height, TX_SIZE tx_size, int dc_pred,
+ int alpha_q3, int64_t *dist_neg_out) {
+ int64_t dist = 0;
+ int diff;
+
+ if (alpha_q3 == 0) {
+ for (int j = 0; j < height; j++) {
+ for (int i = 0; i < width; i++) {
+ diff = src[i] - dc_pred;
+ dist += diff * diff;
+ }
+ src += src_stride;
+ }
+
+ if (dist_neg_out) *dist_neg_out = dist;
+
+ return dist;
+ }
+
+ int64_t dist_neg = 0;
+ const int tx_height = tx_size_high[tx_size];
+ const int tx_width = tx_size_wide[tx_size];
+ const int y_block_row_off = y_stride * tx_height;
+ const int src_block_row_off = src_stride * tx_height;
+ const uint8_t *t_y_pix;
+ const uint8_t *t_src;
+ int a = 0;
+ for (int b_j = 0; b_j < height; b_j += tx_height) {
+ const int h = b_j + tx_height;
+ for (int b_i = 0; b_i < width; b_i += tx_width) {
+ const int w = b_i + tx_width;
+ const int tx_avg_q3 = y_averages_q3[a++];
+ t_y_pix = y_pix;
+ t_src = src;
+ for (int t_j = b_j; t_j < h; t_j++) {
+ for (int t_i = b_i; t_i < w; t_i++) {
+ const int uv = t_src[t_i];
+
+ const int scaled_luma =
+ get_scaled_luma_q0(alpha_q3, t_y_pix[t_i], tx_avg_q3);
+
+ // TODO(ltrudeau) add support for HBD.
+ diff = uv - clamp(scaled_luma + dc_pred, 0, 255);
+ dist += diff * diff;
+
+ // TODO(ltrudeau) add support for HBD.
+ diff = uv - clamp(-scaled_luma + dc_pred, 0, 255);
+ dist_neg += diff * diff;
+ }
+ t_y_pix += y_stride;
+ t_src += src_stride;
+ }
+ }
+ y_pix += y_block_row_off;
+ src += src_block_row_off;
+ }
+
+ if (dist_neg_out) *dist_neg_out = dist_neg;
+
+ return dist;
+}
+
+static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
+ assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
+ AOM_ICDF(CDF_PROB_TOP));
+
+ aom_cdf_prob prev_cdf = 0;
+
+ for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
+ const int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
+ (cfl_alpha_codes[c][CFL_PRED_V] != 0);
+
+ aom_cdf_prob prob = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - prev_cdf;
+ prev_cdf = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]);
+
+ cfl->costs[c] = av1_cost_symbol(prob) + av1_cost_literal(sign_bit_cost);
+ }
+}
+
+static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
+ const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
+ const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
+ const uint8_t *const src_u = p_u->src.buf;
+ const uint8_t *const src_v = p_v->src.buf;
+ const int src_stride_u = p_u->src.stride;
+ const int src_stride_v = p_v->src.stride;
+
+ MACROBLOCKD *const xd = &x->e_mbd;
+ FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+
+ CFL_CTX *const cfl = xd->cfl;
+ cfl_compute_parameters(xd, tx_size);
+ const int width = cfl->uv_width;
+ const int height = cfl->uv_height;
+ const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
+ const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
+ const int *y_averages_q3 = cfl->y_averages_q3;
+ const uint8_t *y_pix = cfl->y_down_pix;
+
+ CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
+
+ cfl_update_costs(cfl, ec_ctx);
+
+ int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
+ sse[CFL_PRED_U][0] =
+ cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u,
+ width, height, tx_size, dc_pred_u, 0, NULL);
+ sse[CFL_PRED_V][0] =
+ cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v,
+ width, height, tx_size, dc_pred_v, 0, NULL);
+
+ for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
+ assert(cfl_alpha_mags_q3[m + 1] == -cfl_alpha_mags_q3[m]);
+ sse[CFL_PRED_U][m] = cfl_alpha_dist(
+ y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height,
+ tx_size, dc_pred_u, cfl_alpha_mags_q3[m], &sse[CFL_PRED_U][m + 1]);
+ sse[CFL_PRED_V][m] = cfl_alpha_dist(
+ y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height,
+ tx_size, dc_pred_v, cfl_alpha_mags_q3[m], &sse[CFL_PRED_V][m + 1]);
+ }
+
+ int64_t dist;
+ int64_t cost;
+ int64_t best_cost;
+
+ // Compute least squares parameter of the entire block
+ // IMPORTANT: We assume that the first code is 0,0
+ int ind = 0;
+ signs[CFL_PRED_U] = CFL_SIGN_POS;
+ signs[CFL_PRED_V] = CFL_SIGN_POS;
+
+ dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
+ dist *= 16;
+ best_cost = RDCOST(x->rdmult, cfl->costs[0], dist);
+
+ for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
+ const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
+ const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
+ for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
+ for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
+ dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
+ sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
+ dist *= 16;
+ cost = RDCOST(x->rdmult, cfl->costs[c], dist);
+ if (cost < best_cost) {
+ best_cost = cost;
+ ind = c;
+ signs[CFL_PRED_U] = sign_u;
+ signs[CFL_PRED_V] = sign_v;
+ }
+ }
+ }
+ }
+
+ mbmi->cfl_alpha_idx = ind;
+ return cfl->costs[ind];
+}
+#endif // CONFIG_CFL
+
static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
#if CONFIG_PALETTE
mbmi->palette_mode_info.palette_size[1] = 0;
#endif // CONFIG_PALETTE
@@ -4870,20 +5767,19 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
assert(!is_inter_block(mbmi));
MB_MODE_INFO best_mbmi = *mbmi;
- PREDICTION_MODE mode;
int64_t best_rd = INT64_MAX, this_rd;
- int this_rate;
- RD_STATS tokenonly_rd_stats;
#if CONFIG_PVQ
od_rollback_buffer buf;
od_encode_checkpoint(&x->daala_enc, &buf);
#endif // CONFIG_PVQ
#if CONFIG_PALETTE
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- uint8_t *best_palette_color_map = NULL;
#endif // CONFIG_PALETTE
- for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+ for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
+ int this_rate;
+ RD_STATS tokenonly_rd_stats;
+ UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
#if CONFIG_EXT_INTRA
const int is_directional_mode =
av1_is_directional_mode(mode, mbmi->sb_type);
@@ -4893,9 +5789,16 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
continue;
mbmi->uv_mode = mode;
+#if CONFIG_CFL
+ int cfl_alpha_rate = 0;
+ if (mode == UV_DC_PRED) {
+ const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
+ cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size);
+ }
+#endif
#if CONFIG_EXT_INTRA
mbmi->angle_delta[1] = 0;
- if (is_directional_mode) {
+ if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
@@ -4915,8 +5818,13 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
this_rate =
tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
+#if CONFIG_CFL
+ if (mode == UV_DC_PRED) {
+ this_rate += cfl_alpha_rate;
+ }
+#endif
#if CONFIG_EXT_INTRA
- if (is_directional_mode) {
+ if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
}
@@ -4927,7 +5835,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#endif // CONFIG_FILTER_INTRA
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 &&
- mode == DC_PRED)
+ mode == UV_DC_PRED)
this_rate += av1_cost_bit(
av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
#endif // CONFIG_PALETTE
@@ -4935,7 +5843,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, &buf);
#endif // CONFIG_PVQ
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
+ this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < best_rd) {
best_mbmi = *mbmi;
@@ -4949,9 +5857,9 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) {
- best_palette_color_map = x->palette_buffer->best_palette_color_map;
+ uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
rd_pick_palette_intra_sbuv(cpi, x,
- cpi->intra_uv_mode_cost[mbmi->mode][DC_PRED],
+ cpi->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED],
best_palette_color_map, &best_mbmi, &best_rd,
rate, rate_tokenonly, distortion, skippable);
}
@@ -4975,7 +5883,7 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
TX_SIZE max_tx_size, int *rate_uv,
int *rate_uv_tokenonly, int64_t *dist_uv,
- int *skip_uv, PREDICTION_MODE *mode_uv) {
+ int *skip_uv, UV_PREDICTION_MODE *mode_uv) {
// Use an estimated rd for uv_intra based on DC_PRED if the
// appropriate speed flag is set.
(void)ctx;
@@ -4990,7 +5898,7 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
*rate_uv_tokenonly = 0;
*dist_uv = 0;
*skip_uv = 1;
- *mode_uv = DC_PRED;
+ *mode_uv = UV_DC_PRED;
return;
}
BLOCK_SIZE bs = scale_chroma_bsize(bsize, x->e_mbd.plane[1].subsampling_x,
@@ -5011,6 +5919,12 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
if (is_inter_compound_mode(mode)) {
return cpi
->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(mode)) {
+ return cpi
+ ->inter_singleref_comp_mode_cost[mode_context]
+ [INTER_SINGLEREF_COMP_OFFSET(mode)];
+#endif // CONFIG_COMPOUND_SINGLEREF
}
#endif
@@ -5096,8 +6010,13 @@ typedef struct {
int segment_yrate;
PREDICTION_MODE modes[4];
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ SEG_RDSTAT rdstat[4][INTER_MODES + INTER_SINGLEREF_COMP_MODES +
+ INTER_COMPOUND_MODES];
+#else // !CONFIG_COMPOUND_SINGLEREF
SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
-#else
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
SEG_RDSTAT rdstat[4][INTER_MODES];
#endif // CONFIG_EXT_INTER
int mvthresh;
@@ -5120,27 +6039,28 @@ static int check_best_zero_mv(
int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
int mi_row, int mi_col) {
- int_mv zeromv[2];
+ int_mv zeromv[2] = { {.as_int = 0 } };
+#if CONFIG_GLOBAL_MOTION
int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
- int cur_frm;
+#endif
(void)mi_row;
(void)mi_col;
- for (cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
#if CONFIG_GLOBAL_MOTION
- if (this_mode == ZEROMV
+ if (this_mode == ZEROMV
#if CONFIG_EXT_INTER
- || this_mode == ZERO_ZEROMV
+ || this_mode == ZERO_ZEROMV
#endif // CONFIG_EXT_INTER
- )
+ ) {
+ for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
zeromv[cur_frm].as_int =
gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
cpi->common.allow_high_precision_mv, bsize,
mi_col, mi_row, block)
.as_int;
- else
-#endif // CONFIG_GLOBAL_MOTION
- zeromv[cur_frm].as_int = 0;
+ }
}
+#endif // CONFIG_GLOBAL_MOTION
+
#if !CONFIG_EXT_INTER
assert(ref_frames[1] != INTRA_FRAME); // Just sanity check
#endif // !CONFIG_EXT_INTER
@@ -5201,8 +6121,11 @@ static int check_best_zero_mv(
}
static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
- int mi_col,
+ BLOCK_SIZE bsize, int_mv *frame_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv *frame_comp_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int mi_row, int mi_col,
#if CONFIG_EXT_INTER
int_mv *ref_mv_sub8x8[2], const uint8_t *mask,
int mask_stride,
@@ -5213,35 +6136,47 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
const int ph = block_size_high[bsize];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- // This function should only ever be called for compound modes
+// This function should only ever be called for compound modes
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) {
+ assert(is_inter_singleref_comp_mode(mbmi->mode));
+ assert(frame_comp_mv);
+ }
+ assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode));
+ const int refs[2] = { mbmi->ref_frame[0], has_second_ref(mbmi)
+ ? mbmi->ref_frame[1]
+ : mbmi->ref_frame[0] };
+#else
assert(has_second_ref(mbmi));
const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
int_mv ref_mv[2];
int ite, ref;
-#if CONFIG_DUAL_FILTER
- InterpFilter interp_filter[4] = {
- mbmi->interp_filter[0], mbmi->interp_filter[1], mbmi->interp_filter[2],
- mbmi->interp_filter[3],
- };
-#else
- const InterpFilter interp_filter = mbmi->interp_filter;
-#endif // CONFIG_DUAL_FILTER
struct scale_factors sf;
- struct macroblockd_plane *const pd = &xd->plane[0];
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
// ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
const int ic = block & 1;
const int ir = (block - ic) >> 1;
+ struct macroblockd_plane *const pd = &xd->plane[0];
const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
#if CONFIG_GLOBAL_MOTION
int is_global[2];
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#else
for (ref = 0; ref < 2; ++ref) {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
WarpedMotionParams *const wm =
&xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) is_global[1] = is_global[0];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
#endif // CONFIG_GLOBAL_MOTION
+#else // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ (void)block;
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
// Do joint motion search in compound mode to get more accurate mv.
@@ -5264,7 +6199,11 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
(void)ref_mv_sub8x8;
#endif // CONFIG_EXT_INTER && CONFIG_CB4X4
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#else
for (ref = 0; ref < 2; ++ref) {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
@@ -5284,6 +6223,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
}
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) {
+ assert(is_inter_singleref_comp_mode(mbmi->mode));
+ // NOTE: For single ref comp mode, set up the 2nd set of ref_mv/pre_planes
+ // all from the 1st reference frame, i.e. refs[0].
+ ref_mv[1] = x->mbmi_ext->ref_mvs[refs[0]][0];
+ if (scaled_ref_frame[0]) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[1][i] = xd->plane[i].pre[1];
+ av1_setup_pre_planes(xd, 1, scaled_ref_frame[0], mi_row, mi_col, NULL);
+ }
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
// Since we have scaled the reference frames to match the size of the current
// frame we must use a unit scaling factor during mode selection.
#if CONFIG_HIGHBITDEPTH
@@ -5294,9 +6251,16 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
cm->height);
#endif // CONFIG_HIGHBITDEPTH
- // Allow joint search multiple times iteratively for each reference frame
- // and break out of the search loop if it couldn't find a better mv.
+// Allow joint search multiple times iteratively for each reference frame
+// and break out of the search loop if it couldn't find a better mv.
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ const int num_ites =
+ (has_second_ref(mbmi) || mbmi->mode == SR_NEW_NEWMV) ? 4 : 1;
+ const int start_ite = has_second_ref(mbmi) ? 0 : 1;
+ for (ite = start_ite; ite < (start_ite + num_ites); ite++) {
+#else
for (ite = 0; ite < 4; ite++) {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
struct buf_2d ref_yv12[2];
int bestsme = INT_MAX;
int sadpb = x->sadperbit16;
@@ -5308,7 +6272,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
// odd iterations search in the second. The predictor
// found for the 'other' reference frame is factored in.
const int plane = 0;
- ConvolveParams conv_params = get_conv_params(0, plane);
+ ConvolveParams conv_params = get_conv_params(!id, 0, plane);
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
WarpTypesAllowed warp_types;
#if CONFIG_GLOBAL_MOTION
@@ -5323,21 +6287,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
ref_yv12[0] = xd->plane[plane].pre[0];
ref_yv12[1] = xd->plane[plane].pre[1];
-#if CONFIG_DUAL_FILTER
- // reload the filter types
- interp_filter[0] =
- (id == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0];
- interp_filter[1] =
- (id == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1];
-#endif // CONFIG_DUAL_FILTER
-
// Get the prediction block from the 'other' reference frame.
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ MV *const the_other_mv = (has_second_ref(mbmi) || id)
+ ? &frame_mv[refs[!id]].as_mv
+ : &frame_comp_mv[refs[0]].as_mv;
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
av1_highbd_build_inter_predictor(
ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
- &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ the_other_mv,
+#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
+ &frame_mv[refs[!id]].as_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ &sf, pw, ph, 0, mbmi->interp_filter,
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
&warp_types, p_col, p_row,
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -5347,7 +6314,12 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
#endif // CONFIG_HIGHBITDEPTH
av1_build_inter_predictor(
ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
- &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ the_other_mv,
+#else // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
+ &frame_mv[refs[!id]].as_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ &sf, pw, ph, &conv_params, mbmi->interp_filter,
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
&warp_types, p_col, p_row, plane, !id,
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -5360,13 +6332,24 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
if (id) xd->plane[plane].pre[0] = ref_yv12[id];
av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
- // Use the mv result from the single mode as mv predictor.
- *best_mv = frame_mv[refs[id]].as_mv;
+// Use the mv result from the single mode as mv predictor.
+// Use the mv result from the single mode as mv predictor.
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi) && id)
+ *best_mv = frame_comp_mv[refs[0]].as_mv;
+ else
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ *best_mv = frame_mv[refs[id]].as_mv;
best_mv->col >>= 3;
best_mv->row >>= 3;
- av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
+ else
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
// Small-range full-pixel motion search.
bestsme =
@@ -5392,60 +6375,33 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- if (cpi->sf.use_upsampled_references) {
- // Use up-sampled reference frames.
- struct buf_2d backup_pred = pd->pre[0];
- const YV12_BUFFER_CONFIG *upsampled_ref =
- get_upsampled_ref(cpi, refs[id]);
-
- // Set pred for Y plane
- setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
- upsampled_ref->y_crop_width,
- upsampled_ref->y_crop_height, upsampled_ref->y_stride,
- (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
- pd->subsampling_y);
-
-// If bsize < BLOCK_8X8, adjust pred pointer for this block
-#if !CONFIG_CB4X4
- if (bsize < BLOCK_8X8)
- pd->pre[0].buf =
- &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
- pd->pre[0].stride))
- << 3];
-#endif // !CONFIG_CB4X4
-
- bestsme = cpi->find_fractional_mv_step(
- x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize], 0,
- cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred,
-#if CONFIG_EXT_INTER
- mask, mask_stride, id,
-#endif
- pw, ph, 1);
-
- // Restore the reference frames.
- pd->pre[0] = backup_pred;
- } else {
- (void)block;
- bestsme = cpi->find_fractional_mv_step(
- x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize], 0,
- cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred,
+ bestsme = cpi->find_fractional_mv_step(
+ x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize], 0,
+ cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
+ &dis, &sse, second_pred,
#if CONFIG_EXT_INTER
- mask, mask_stride, id,
+ mask, mask_stride, id,
#endif
- pw, ph, 0);
- }
+ pw, ph, cpi->sf.use_upsampled_references);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
if (id) xd->plane[plane].pre[0] = ref_yv12[0];
if (bestsme < last_besterr[id]) {
- frame_mv[refs[id]].as_mv = *best_mv;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // NOTE: For single ref comp mode, frame_mv stores the first mv and
+ // frame_comp_mv stores the second mv.
+ if (!has_second_ref(mbmi) && id)
+ frame_comp_mv[refs[0]].as_mv = *best_mv;
+ else
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ frame_mv[refs[id]].as_mv = *best_mv;
last_besterr[id] = bestsme;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) last_besterr[!id] = last_besterr[id];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
} else {
break;
}
@@ -5453,40 +6409,92 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
*rate_mv = 0;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#else
for (ref = 0; ref < 2; ++ref) {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
if (scaled_ref_frame[ref]) {
// Restore the prediction frame pointers to their unscaled versions.
int i;
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[ref] = backup_yv12[ref][i];
}
- av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
+
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
+ else
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
+
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) {
+ // NOTE: For single ref comp mode, i.e. !has_second_ref(mbmi) is true, the
+ // first mv is stored in frame_mv[] and the second mv is stored in
+ // frame_comp_mv[].
+ if (compound_ref0_mode(mbmi->mode) == NEWMV) // SR_NEW_NEWMV
+ *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ assert(compound_ref1_mode(mbmi->mode) == NEWMV);
+ *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ } else {
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
- if (bsize >= BLOCK_8X8)
+ if (bsize >= BLOCK_8X8)
#endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
- *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
- &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
- else
- *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
- &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
+ else
+ *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
#endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
}
+
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi)) {
+ if (scaled_ref_frame[0]) {
+ // Restore the prediction frame pointers to their unscaled versions.
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = backup_yv12[1][i];
+ }
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
}
-static void estimate_ref_frame_costs(const AV1_COMMON *cm,
- const MACROBLOCKD *xd, int segment_id,
- unsigned int *ref_costs_single,
- unsigned int *ref_costs_comp,
- aom_prob *comp_mode_p) {
+static void estimate_ref_frame_costs(
+ const AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
+ unsigned int *ref_costs_single,
+#if CONFIG_EXT_COMP_REFS
+ unsigned int (*ref_costs_comp)[TOTAL_REFS_PER_FRAME],
+#else
+ unsigned int *ref_costs_comp,
+#endif // CONFIG_EXT_COMP_REFS
+ aom_prob *comp_mode_p) {
int seg_ref_active =
segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
if (seg_ref_active) {
memset(ref_costs_single, 0,
TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
+#if CONFIG_EXT_COMP_REFS
+ int ref_frame;
+ for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
+ memset(ref_costs_comp[ref_frame], 0,
+ TOTAL_REFS_PER_FRAME * sizeof((*ref_costs_comp)[0]));
+#else
memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
+#endif // CONFIG_EXT_COMP_REFS
+
*comp_mode_p = 128;
} else {
aom_prob intra_inter_p = av1_get_intra_inter_prob(cm, xd);
@@ -5541,7 +6549,7 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm,
ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
-#else
+#else // !CONFIG_EXT_REFS
ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1);
ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
@@ -5570,6 +6578,63 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm,
unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
+#if CONFIG_EXT_COMP_REFS
+ aom_prob comp_ref_type_p = av1_get_comp_reference_type_prob(cm, xd);
+ unsigned int ref_bicomp_costs[TOTAL_REFS_PER_FRAME] = { 0 };
+
+ ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
+ ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
+#if USE_UNI_COMP_REFS
+ base_cost + av1_cost_bit(comp_ref_type_p, 1);
+#else
+ base_cost;
+#endif // USE_UNI_COMP_REFS
+ ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF_FRAME] = 0;
+
+ ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
+ ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
+ ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
+ ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
+
+ ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
+ ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
+
+ ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
+ ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
+
+ ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
+ ref_bicomp_costs[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
+
+ int ref0;
+ for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
+ ref_costs_comp[ref0][BWDREF_FRAME] =
+ ref_bicomp_costs[ref0] + ref_bicomp_costs[BWDREF_FRAME];
+ ref_costs_comp[ref0][ALTREF_FRAME] =
+ ref_bicomp_costs[ref0] + ref_bicomp_costs[ALTREF_FRAME];
+ }
+
+ aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd);
+ aom_prob uni_comp_ref_p1 = av1_get_pred_prob_uni_comp_ref_p1(cm, xd);
+ aom_prob uni_comp_ref_p2 = av1_get_pred_prob_uni_comp_ref_p2(cm, xd);
+
+ ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
+ base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+ av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 0);
+ ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
+ base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+ av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
+ av1_cost_bit(uni_comp_ref_p2, 0);
+ ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
+ base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+ av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
+ av1_cost_bit(uni_comp_ref_p2, 1);
+
+ ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
+ base_cost + av1_cost_bit(comp_ref_type_p, 0) +
+ av1_cost_bit(uni_comp_ref_p, 1);
+
+#else // !CONFIG_EXT_COMP_REFS
+
ref_costs_comp[LAST_FRAME] =
#if CONFIG_EXT_REFS
ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] =
@@ -5596,11 +6661,23 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm,
// more bit.
ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
-#else
+#else // !CONFIG_EXT_REFS
ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_COMP_REFS
} else {
+#if CONFIG_EXT_COMP_REFS
+ int ref0;
+ for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
+ ref_costs_comp[ref0][BWDREF_FRAME] = 512;
+ ref_costs_comp[ref0][ALTREF_FRAME] = 512;
+ }
+ ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
+ ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
+ ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
+ ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
+#else // !CONFIG_EXT_COMP_REFS
ref_costs_comp[LAST_FRAME] = 512;
#if CONFIG_EXT_REFS
ref_costs_comp[LAST2_FRAME] = 512;
@@ -5609,6 +6686,7 @@ static void estimate_ref_frame_costs(const AV1_COMMON *cm,
ref_costs_comp[ALTREF_FRAME] = 512;
#endif // CONFIG_EXT_REFS
ref_costs_comp[GOLDEN_FRAME] = 512;
+#endif // CONFIG_EXT_COMP_REFS
}
}
}
@@ -5693,8 +6771,13 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
int sadpb = x->sadperbit16;
MV mvp_full;
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ int ref =
+ has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0];
+#else // !CONFIG_COMPOUND_SINGLEREF
int ref = mbmi->ref_frame[ref_idx];
-#else
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
int ref = mbmi->ref_frame[0];
int ref_idx = 0;
#endif // CONFIG_EXT_INTER
@@ -5802,7 +6885,7 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
&(x->best_mv.as_mv), 0);
break;
- default: assert("Invalid motion mode!\n");
+ default: assert(0 && "Invalid motion mode!\n");
}
#endif // CONFIG_MOTION_VAR
@@ -5820,17 +6903,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x->second_best_mv.as_int != x->best_mv.as_int;
const int pw = block_size_wide[bsize];
const int ph = block_size_high[bsize];
- // Use up-sampled reference frames.
- struct macroblockd_plane *const pd = &xd->plane[0];
- struct buf_2d backup_pred = pd->pre[ref_idx];
- const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
-
- // Set pred for Y plane
- setup_pred_plane(
- &pd->pre[ref_idx], bsize, upsampled_ref->y_buffer,
- upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
- upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
- pd->subsampling_x, pd->subsampling_y);
best_mv_var = cpi->find_fractional_mv_step(
x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
@@ -5873,9 +6945,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x->best_mv.as_mv = best_mv;
}
}
-
- // Restore the reference frames.
- pd->pre[ref_idx] = backup_pred;
} else {
cpi->find_fractional_mv_step(
x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
@@ -5891,13 +6960,12 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
break;
case OBMC_CAUSAL:
av1_find_best_obmc_sub_pixel_tree_up(
- cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
- cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
- cpi->sf.use_upsampled_references);
+ x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis,
+ &x->pred_sse[ref], 0, cpi->sf.use_upsampled_references);
break;
- default: assert("Invalid motion mode!\n");
+ default: assert(0 && "Invalid motion mode!\n");
}
#endif // CONFIG_MOTION_VAR
}
@@ -5936,15 +7004,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
const int ph = block_size_high[bsize];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_COMPOUND_SINGLEREF
+ const int other_ref =
+ has_second_ref(mbmi) ? mbmi->ref_frame[!ref_idx] : mbmi->ref_frame[0];
+#else // !CONFIG_COMPOUND_SINGLEREF
const int other_ref = mbmi->ref_frame[!ref_idx];
-#if CONFIG_DUAL_FILTER
- InterpFilter interp_filter[2] = {
- (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0],
- (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1]
- };
-#else
- const InterpFilter interp_filter = mbmi->interp_filter;
-#endif // CONFIG_DUAL_FILTER
+#endif // CONFIG_COMPOUND_SINGLEREF
struct scale_factors sf;
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -5961,8 +7026,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
(void)block;
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- // This function should only ever be called for compound modes
+// This function should only ever be called for compound modes
+#if CONFIG_COMPOUND_SINGLEREF
+ assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode));
+#else // !CONFIG_COMPOUND_SINGLEREF
assert(has_second_ref(mbmi));
+#endif // CONFIG_COMPOUND_SINGLEREF
struct buf_2d backup_yv12[MAX_MB_PLANE];
const YV12_BUFFER_CONFIG *const scaled_ref_frame =
@@ -5991,7 +7060,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
struct buf_2d ref_yv12;
const int plane = 0;
- ConvolveParams conv_params = get_conv_params(0, plane);
+ ConvolveParams conv_params = get_conv_params(!ref_idx, 0, plane);
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
WarpTypesAllowed warp_types;
#if CONFIG_GLOBAL_MOTION
@@ -6010,7 +7079,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
av1_highbd_build_inter_predictor(
ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
- 0, interp_filter,
+ 0, mbmi->interp_filter,
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
&warp_types, p_col, p_row,
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -6019,7 +7088,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
#endif // CONFIG_HIGHBITDEPTH
av1_build_inter_predictor(
ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
- &conv_params, interp_filter,
+ &conv_params, mbmi->interp_filter,
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
&warp_types, p_col, p_row, plane, !ref_idx,
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -6038,15 +7107,22 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
// Search for the best mv for one component of a compound,
// given that the other component is fixed.
-static void compound_single_motion_search(
- const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv,
- int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int *rate_mv, const int block, int ref_idx) {
+static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *this_mv,
+ int mi_row, int mi_col,
+ const uint8_t *second_pred,
+ const uint8_t *mask, int mask_stride,
+ int *rate_mv, int ref_idx) {
const int pw = block_size_wide[bsize];
const int ph = block_size_high[bsize];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_COMPOUND_SINGLEREF
+ const int ref =
+ has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0];
+#else
const int ref = mbmi->ref_frame[ref_idx];
+#endif // CONFIG_COMPOUND_SINGLEREF
int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -6054,9 +7130,16 @@ static void compound_single_motion_search(
const YV12_BUFFER_CONFIG *const scaled_ref_frame =
av1_get_scaled_ref_frame(cpi, ref);
- // Check that this is either an interinter or an interintra block
+// Check that this is either an interinter or an interintra block
+#if CONFIG_COMPOUND_SINGLEREF
assert(has_second_ref(mbmi) ||
+ // or a single ref comp pred mode
+ is_inter_singleref_comp_mode(mbmi->mode) ||
(ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
+#else
+ assert(has_second_ref(mbmi) ||
+ (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
+#endif // CONFIG_COMPOUND_SINGLEREF
if (scaled_ref_frame) {
int i;
@@ -6091,7 +7174,12 @@ static void compound_single_motion_search(
best_mv->col >>= 3;
best_mv->row >>= 3;
- av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx);
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
// Small-range full-pixel motion search.
bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
@@ -6112,43 +7200,11 @@ static void compound_single_motion_search(
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- if (cpi->sf.use_upsampled_references) {
- // Use up-sampled reference frames.
- struct buf_2d backup_pred = pd->pre[0];
- const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
-
- // Set pred for Y plane
- setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
- upsampled_ref->y_crop_width,
- upsampled_ref->y_crop_height, upsampled_ref->y_stride,
- (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
- pd->subsampling_y);
-
-// If bsize < BLOCK_8X8, adjust pred pointer for this block
-#if !CONFIG_CB4X4
- if (bsize < BLOCK_8X8)
- pd->pre[0].buf =
- &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
- pd->pre[0].stride))
- << 3];
-#endif // !CONFIG_CB4X4
-
- bestsme = cpi->find_fractional_mv_step(
- x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
- x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
- mask_stride, ref_idx, pw, ph, 1);
-
- // Restore the reference frames.
- pd->pre[0] = backup_pred;
- } else {
- (void)block;
- bestsme = cpi->find_fractional_mv_step(
- x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
- x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
- mask_stride, ref_idx, pw, ph, 0);
- }
+ bestsme = cpi->find_fractional_mv_step(
+ x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
+ x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
+ ref_idx, pw, ph, cpi->sf.use_upsampled_references);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
@@ -6165,7 +7221,12 @@ static void compound_single_motion_search(
xd->plane[i].pre[ref_idx] = backup_yv12[i];
}
- av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx);
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
*rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
x->mvcost, MV_COST_WEIGHT);
}
@@ -6174,13 +7235,23 @@ static void compound_single_motion_search(
// where the second prediction is also an inter mode.
static void compound_single_motion_search_interinter(
const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ int_mv *frame_comp_mv,
+#endif // CONFIG_COMPOUND_SINGLEREF
int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
const int block, int ref_idx) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- // This function should only ever be called for compound modes
+// This function should only ever be called for compound modes
+#if CONFIG_COMPOUND_SINGLEREF
+ int is_singleref_comp_mode =
+ !has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode);
+ assert(has_second_ref(mbmi) || is_singleref_comp_mode);
+ if (is_singleref_comp_mode && ref_idx) assert(frame_comp_mv);
+#else // !CONFIG_COMPOUND_SINGLEREF
assert(has_second_ref(mbmi));
+#endif // CONFIG_COMPOUND_SINGLEREF
// Prediction buffer from second frame.
#if CONFIG_HIGHBITDEPTH
@@ -6194,14 +7265,26 @@ static void compound_single_motion_search_interinter(
DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
#endif // CONFIG_HIGHBITDEPTH
+#if CONFIG_COMPOUND_SINGLEREF
+ MV *this_mv = has_second_ref(mbmi)
+ ? &frame_mv[mbmi->ref_frame[ref_idx]].as_mv
+ : (ref_idx ? &frame_comp_mv[mbmi->ref_frame[0]].as_mv
+ : &frame_mv[mbmi->ref_frame[0]].as_mv);
+ const MV *other_mv =
+ has_second_ref(mbmi)
+ ? &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv
+ : (ref_idx ? &frame_mv[mbmi->ref_frame[0]].as_mv
+ : &frame_comp_mv[mbmi->ref_frame[0]].as_mv);
+#else // !CONFIG_COMPOUND_SINGLEREF
MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
+#endif // CONFIG_COMPOUND_SINGLEREF
build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
ref_idx, second_pred);
compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
- second_pred, mask, mask_stride, rate_mv, block,
+ second_pred, mask, mask_stride, rate_mv,
ref_idx);
}
@@ -6220,21 +7303,40 @@ static void do_masked_motion_search_indexed(
mask = av1_get_compound_type_mask(comp_data, sb_type);
int_mv frame_mv[TOTAL_REFS_PER_FRAME];
+#if CONFIG_COMPOUND_SINGLEREF
+ int_mv frame_comp_mv[TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_COMPOUND_SINGLEREF
MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4);
frame_mv[rf[0]].as_int = cur_mv[0].as_int;
- frame_mv[rf[1]].as_int = cur_mv[1].as_int;
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ frame_comp_mv[rf[0]].as_int = cur_mv[1].as_int;
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ frame_mv[rf[1]].as_int = cur_mv[1].as_int;
if (which == 0 || which == 1) {
- compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
- mi_col, mask, mask_stride, rate_mv,
- 0, which);
+ compound_single_motion_search_interinter(
+ cpi, x, bsize, frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ has_second_ref(mbmi) ? NULL : frame_comp_mv,
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, mask, mask_stride, rate_mv, 0, which);
} else if (which == 2) {
- joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
- mask_stride, rate_mv, 0);
+ joint_motion_search(cpi, x, bsize, frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ has_second_ref(mbmi) ? NULL : frame_comp_mv,
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, NULL, mask, mask_stride, rate_mv, 0);
}
tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
- tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
+#if CONFIG_COMPOUND_SINGLEREF
+ if (!has_second_ref(mbmi))
+ tmp_mv[1].as_int = frame_comp_mv[rf[0]].as_int;
+ else // comp ref
+#endif // CONFIG_COMPOUND_SINGLEREF
+ tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
}
#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
#endif // CONFIG_EXT_INTER
@@ -6483,7 +7585,7 @@ static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
- rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ rd = RDCOST(x->rdmult, rate, dist);
if (rd < best_rd) {
*best_wedge_index = wedge_index;
@@ -6544,7 +7646,7 @@ static int64_t pick_wedge_fixed_sign(
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
- rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ rd = RDCOST(x->rdmult, rate, dist);
if (rd < best_rd) {
*best_wedge_index = wedge_index;
@@ -6646,7 +7748,7 @@ static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
- rd0 = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ rd0 = RDCOST(x->rdmult, rate, dist);
if (rd0 < best_rd) {
best_mask_type = cur_mask_type;
@@ -6729,7 +7831,17 @@ static int interinter_compound_motion_search(
#endif // CONFIG_COMPOUND_SEGMENT
mbmi->interinter_compound_type
};
- if (this_mode == NEW_NEWMV) {
+#if CONFIG_COMPOUND_SINGLEREF
+ // NOTE: Mode is needed to identify the compound mode prediction, regardless
+ // of comp refs or single ref.
+ mbmi->mode = this_mode;
+#endif // CONFIG_COMPOUND_SINGLEREF
+
+ if (this_mode == NEW_NEWMV
+#if CONFIG_COMPOUND_SINGLEREF
+ || this_mode == SR_NEW_NEWMV
+#endif // CONFIG_COMPOUND_SINGLEREF
+ ) {
do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
mbmi->mv[0].as_int = tmp_mv[0].as_int;
@@ -6738,7 +7850,12 @@ static int interinter_compound_motion_search(
do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
mbmi->mv[0].as_int = tmp_mv[0].as_int;
- } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
+ } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV
+#if CONFIG_COMPOUND_SINGLEREF
+ // || this_mode == SR_NEAREST_NEWMV
+ || this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV
+#endif // CONFIG_COMPOUND_SINGLEREF
+ ) {
do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
mbmi->mv[1].as_int = tmp_mv[1].as_int;
@@ -6763,7 +7880,7 @@ static int64_t build_and_cost_compound_type(
const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
- best_rd_cur += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0);
+ best_rd_cur += RDCOST(x->rdmult, rs2 + rate_mv, 0);
if (have_newmv_in_inter_mode(this_mode) &&
use_masked_motion_search(compound_type)) {
@@ -6772,7 +7889,7 @@ static int64_t build_and_cost_compound_type(
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
if (rd >= best_rd_cur) {
mbmi->mv[0].as_int = cur_mv[0].as_int;
mbmi->mv[1].as_int = cur_mv[1].as_int;
@@ -6788,7 +7905,7 @@ static int64_t build_and_cost_compound_type(
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
best_rd_cur = rd;
} else {
@@ -6801,7 +7918,7 @@ static int64_t build_and_cost_compound_type(
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
best_rd_cur = rd;
}
return best_rd_cur;
@@ -6832,6 +7949,9 @@ typedef struct {
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
const BLOCK_SIZE bsize,
int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv (*const mode_comp_mv)[TOTAL_REFS_PER_FRAME],
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
const int mi_row, const int mi_col,
int *const rate_mv, int_mv *const single_newmv,
HandleInterModeArgs *const args) {
@@ -6844,6 +7964,9 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
#endif // CONFIG_EXT_INTER
int_mv *const frame_mv = mode_mv[this_mode];
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv *const frame_comp_mv = mode_comp_mv[this_mode];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
const int refs[2] = { mbmi->ref_frame[0],
mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
int i;
@@ -6861,8 +7984,11 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
- joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
- 0, rate_mv, 0);
+ joint_motion_search(cpi, x, bsize, frame_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ NULL, // int_mv *frame_comp_mv
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, NULL, NULL, 0, rate_mv, 0);
} else {
*rate_mv = 0;
for (i = 0; i < 2; ++i) {
@@ -6877,8 +8003,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
frame_mv[refs[0]].as_int =
mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
- compound_single_motion_search_interinter(
- cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
+ compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ NULL,
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, NULL, 0,
+ rate_mv, 0, 1);
} else {
av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
*rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
@@ -6891,8 +8021,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
frame_mv[refs[1]].as_int =
mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
- compound_single_motion_search_interinter(
- cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
+ compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
+#if CONFIG_COMPOUND_SINGLEREF
+ NULL,
+#endif // CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, NULL, 0,
+ rate_mv, 0, 0);
} else {
av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
*rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
@@ -6900,7 +8034,7 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
}
}
-#else
+#else // !CONFIG_EXT_INTER
// Initialize mv using single prediction mode result.
frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
@@ -6917,6 +8051,41 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
}
}
#endif // CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(this_mode)) {
+ // Single ref comp mode
+ const int mode0 = compound_ref0_mode(this_mode);
+
+ single_newmv[refs[0]].as_int = args->single_newmv[refs[0]].as_int;
+ frame_mv[refs[0]].as_int = (mode0 == NEWMV)
+ ? single_newmv[refs[0]].as_int
+ : mode_mv[mode0][refs[0]].as_int;
+ assert(compound_ref1_mode(this_mode) == NEWMV);
+ frame_comp_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ if (this_mode == SR_NEW_NEWMV) {
+ joint_motion_search(cpi, x, bsize, frame_mv, frame_comp_mv, mi_row,
+ mi_col, NULL, NULL, 0, rate_mv, 0);
+ } else {
+ assert( // this_mode == SR_NEAREST_NEWMV ||
+ this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV);
+ compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
+ frame_comp_mv, mi_row, mi_col,
+ NULL, 0, rate_mv, 0, 1);
+ }
+ } else {
+ *rate_mv = 0;
+ av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
+ if (mode0 == NEWMV)
+ *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv,
+ &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
} else {
#if CONFIG_EXT_INTER
if (is_comp_interintra_pred) {
@@ -6984,7 +8153,7 @@ int64_t interpolation_filter_search(
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
skip_txfm_sb, skip_sse_sb);
- *rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist);
+ *rd = RDCOST(x->rdmult, *switchable_rate + tmp_rate, tmp_dist);
if (assign_filter == SWITCHABLE) {
// do interp_filter search
@@ -7020,7 +8189,7 @@ int64_t interpolation_filter_search(
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
&tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
- tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist);
+ tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
if (tmp_rd < *rd) {
*rd = tmp_rd;
@@ -7072,12 +8241,10 @@ static int64_t motion_mode_rd(
int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
const int *refs, int rate_mv,
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+ // only used when WARPED_MOTION is on?
int_mv *const single_newmv,
#if CONFIG_EXT_INTER
- int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi,
-#if CONFIG_MOTION_VAR
- int rate_mv_bmc,
-#endif // CONFIG_MOTION_VAR
+ int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc,
#endif // CONFIG_EXT_INTER
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
@@ -7108,7 +8275,13 @@ static int64_t motion_mode_rd(
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_WARPED_MOTION
+#if WARPED_MOTION_SORT_SAMPLES
+ int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
+ int pts_mv0[SAMPLES_ARRAY_SIZE];
+ int total_samples;
+#else
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#endif // WARPED_MOTION_SORT_SAMPLES
#endif // CONFIG_WARPED_MOTION
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -7118,18 +8291,39 @@ static int64_t motion_mode_rd(
if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
#if CONFIG_WARPED_MOTION
aom_clear_system_state();
+#if WARPED_MOTION_SORT_SAMPLES
+ mbmi->num_proj_ref[0] =
+ findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0);
+ total_samples = mbmi->num_proj_ref[0];
+#else
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
+#endif // WARPED_MOTION_SORT_SAMPLES
#if CONFIG_EXT_INTER
best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
#endif // CONFIG_EXT_INTER
#endif // CONFIG_WARPED_MOTION
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
rate2_nocoeff = rd_stats->rate;
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+ // We cannot estimate the rd cost for the motion mode NCOBMC_ADAPT_WEIGHT
+ // right now since it requires mvs from all neighboring blocks. We will
+ // check if this mode is beneficial after all the mv's in the current
+ // superblock are selected.
+ last_motion_mode_allowed = motion_mode_allowed_wrapper(1,
+#if CONFIG_GLOBAL_MOTION
+ 0, xd->global_motion,
+#endif // CONFIG_GLOBAL_MOTION
+ mi);
+#else
last_motion_mode_allowed = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#if CONFIG_GLOBAL_MOTION
0, xd->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
mi);
+#endif // CONFIG_NCOBMC_ADAPT_WEIGHT
base_mbmi = *mbmi;
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -7155,7 +8349,11 @@ static int64_t motion_mode_rd(
*mbmi = *best_bmc_mbmi;
mbmi->motion_mode = OBMC_CAUSAL;
#endif // CONFIG_EXT_INTER
- if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
+ if (!is_comp_pred &&
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ !is_inter_singleref_comp_mode(this_mode) &&
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ have_newmv_in_inter_mode(this_mode)) {
int tmp_rate_mv = 0;
single_motion_search(cpi, x, bsize, mi_row, mi_col,
@@ -7195,6 +8393,9 @@ static int64_t motion_mode_rd(
#if CONFIG_WARPED_MOTION
if (mbmi->motion_mode == WARPED_CAUSAL) {
+#if WARPED_MOTION_SORT_SAMPLES
+ int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#endif // WARPED_MOTION_SORT_SAMPLES
#if CONFIG_EXT_INTER
*mbmi = *best_bmc_mbmi;
mbmi->motion_mode = WARPED_CAUSAL;
@@ -7210,6 +8411,19 @@ static int64_t motion_mode_rd(
: cm->interp_filter;
#endif // CONFIG_DUAL_FILTER
+#if WARPED_MOTION_SORT_SAMPLES
+ memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
+ memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
+ // Rank the samples by motion vector difference
+ if (mbmi->num_proj_ref[0] > 1) {
+ mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
+ pts_inref, mbmi->num_proj_ref[0]);
+#if CONFIG_EXT_INTER
+ best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
+#endif // CONFIG_EXT_INTER
+ }
+#endif // WARPED_MOTION_SORT_SAMPLES
+
if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
&mbmi->wm_params[0], mi_row, mi_col)) {
@@ -7218,9 +8432,16 @@ static int64_t motion_mode_rd(
int tmp_rate_mv = 0;
const int_mv mv0 = mbmi->mv[0];
WarpedMotionParams wm_params0 = mbmi->wm_params[0];
+#if WARPED_MOTION_SORT_SAMPLES
+ int num_proj_ref0 = mbmi->num_proj_ref[0];
// Refine MV in a small range.
+ av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
+ pts_mv0, total_samples);
+#else
+ // Refine MV in a small range.
av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
+#endif // WARPED_MOTION_SORT_SAMPLES
// Keep the refined MV and WM parameters.
if (mv0.as_int != mbmi->mv[0].as_int) {
@@ -7241,6 +8462,9 @@ static int64_t motion_mode_rd(
tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
}
#if CONFIG_EXT_INTER
+#if WARPED_MOTION_SORT_SAMPLES
+ best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
+#endif // WARPED_MOTION_SORT_SAMPLES
tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
#else
tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
@@ -7255,6 +8479,9 @@ static int64_t motion_mode_rd(
// Restore the old MV and WM parameters.
mbmi->mv[0] = mv0;
mbmi->wm_params[0] = wm_params0;
+#if WARPED_MOTION_SORT_SAMPLES
+ mbmi->num_proj_ref[0] = num_proj_ref0;
+#endif // WARPED_MOTION_SORT_SAMPLES
}
}
@@ -7328,8 +8555,8 @@ static int64_t motion_mode_rd(
av1_merge_rd_stats(rd_stats, rd_stats_y);
- rdcosty = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
- rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
+ rdcosty = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
+ rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, 0, rd_stats->sse));
/* clang-format off */
#if CONFIG_VAR_TX
is_cost_valid_uv =
@@ -7365,12 +8592,11 @@ static int64_t motion_mode_rd(
mbmi->skip = 0;
// here mbmi->skip temporarily plays a role as what this_skip2 does
} else if (!xd->lossless[mbmi->segment_id] &&
- (RDCOST(x->rdmult, x->rddiv,
+ (RDCOST(x->rdmult,
rd_stats_y->rate + rd_stats_uv->rate +
av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
rd_stats->dist) >=
- RDCOST(x->rdmult, x->rddiv,
- av1_cost_bit(av1_get_skip_prob(cm, xd), 1),
+ RDCOST(x->rdmult, av1_cost_bit(av1_get_skip_prob(cm, xd), 1),
rd_stats->sse))) {
rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
@@ -7427,7 +8653,7 @@ static int64_t motion_mode_rd(
#endif // CONFIG_GLOBAL_MOTION
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- tmp_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
+ tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
best_mbmi = *mbmi;
best_rd = tmp_rd;
@@ -7466,11 +8692,17 @@ static int64_t motion_mode_rd(
return 0;
}
-static int64_t handle_inter_mode(
- const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
- int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
- int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) {
+static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, RD_STATS *rd_stats,
+ RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
+ int *disable_skip,
+ int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv (*mode_comp_mv)[TOTAL_REFS_PER_FRAME],
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int mi_row, int mi_col,
+ HandleInterModeArgs *args,
+ const int64_t ref_best_rd) {
const AV1_COMMON *cm = &cpi->common;
(void)cm;
MACROBLOCKD *xd = &x->e_mbd;
@@ -7479,7 +8711,14 @@ static int64_t handle_inter_mode(
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const int is_comp_pred = has_second_ref(mbmi);
const int this_mode = mbmi->mode;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ const int is_singleref_comp_mode = is_inter_singleref_comp_mode(this_mode);
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
int_mv *frame_mv = mode_mv[this_mode];
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // The comp mv for the compound mode in single ref
+ int_mv *frame_comp_mv = mode_comp_mv[this_mode];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
int i;
int refs[2] = { mbmi->ref_frame[0],
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
@@ -7487,7 +8726,7 @@ static int64_t handle_inter_mode(
int rate_mv = 0;
#if CONFIG_EXT_INTER
int pred_exists = 1;
-#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA
const int bw = block_size_wide[bsize];
#endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
int_mv single_newmv[TOTAL_REFS_PER_FRAME];
@@ -7511,9 +8750,7 @@ static int64_t handle_inter_mode(
#if CONFIG_EXT_INTER
int rate2_bmc_nocoeff;
MB_MODE_INFO best_bmc_mbmi;
-#if CONFIG_MOTION_VAR
int rate_mv_bmc;
-#endif // CONFIG_MOTION_VAR
#endif // CONFIG_EXT_INTER
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
int64_t rd = INT64_MAX;
@@ -7523,6 +8760,11 @@ static int64_t handle_inter_mode(
int skip_txfm_sb = 0;
int64_t skip_sse_sb = INT64_MAX;
int16_t mode_ctx;
+#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
+ // dummy fillers
+ mbmi->ncobmc_mode[0] = NO_OVERLAP;
+ mbmi->ncobmc_mode[1] = NO_OVERLAP;
+#endif
#if CONFIG_EXT_INTER
#if CONFIG_INTERINTRA
@@ -7546,7 +8788,11 @@ static int64_t handle_inter_mode(
#endif // CONFIG_EXT_INTER
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ if (is_comp_pred || is_singleref_comp_mode)
+#else // !CONFIG_COMPOUND_SINGLEREF
if (is_comp_pred)
+#endif // CONFIG_COMPOUND_SINGLEREF
mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
else
#endif // CONFIG_EXT_INTER
@@ -7572,12 +8818,22 @@ static int64_t handle_inter_mode(
if (frame_mv[refs[0]].as_int == INVALID_MV ||
frame_mv[refs[1]].as_int == INVALID_MV)
return INT64_MAX;
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ } else if (is_singleref_comp_mode) {
+ if (frame_mv[refs[0]].as_int == INVALID_MV ||
+ frame_comp_mv[refs[0]].as_int == INVALID_MV)
+ return INT64_MAX;
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
}
mbmi->motion_mode = SIMPLE_TRANSLATION;
if (have_newmv_in_inter_mode(this_mode)) {
- const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col,
- &rate_mv, single_newmv, args);
+ const int64_t ret_val =
+ handle_newmv(cpi, x, bsize, mode_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ mode_comp_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, &rate_mv, single_newmv, args);
if (ret_val != 0)
return ret_val;
else
@@ -7591,6 +8847,16 @@ static int64_t handle_inter_mode(
mbmi->mv[i].as_int = cur_mv[i].as_int;
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ if (!is_comp_pred && is_singleref_comp_mode) {
+ cur_mv[1] = frame_comp_mv[refs[0]];
+ // Clip "next_nearest" so that it does not extend to far out of image
+ if (this_mode != NEWMV) clamp_mv2(&cur_mv[1].as_mv, xd);
+ if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_EXT_INTER
if (this_mode == NEAREST_NEARESTMV)
#else
@@ -7614,7 +8880,13 @@ static int64_t handle_inter_mode(
#if CONFIG_EXT_INTER
if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
- if (this_mode == NEAREST_NEWMV) {
+#if CONFIG_COMPOUND_SINGLEREF
+ if (this_mode == NEAREST_NEWMV || // this_mode == SR_NEAREST_NEWMV ||
+ this_mode == SR_NEAREST_NEARMV)
+#else // !CONFIG_COMPOUND_SINGLEREF
+ if (this_mode == NEAREST_NEWMV)
+#endif // CONFIG_COMPOUND_SINGLEREF
+ {
cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
@@ -7635,7 +8907,11 @@ static int64_t handle_inter_mode(
if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
int ref_mv_idx = mbmi->ref_mv_idx + 1;
- if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
+ if (this_mode == NEAR_NEWMV ||
+#if CONFIG_COMPOUND_SINGLEREF
+ this_mode == SR_NEAR_NEWMV ||
+#endif // CONFIG_COMPOUND_SINGLEREF
+ this_mode == NEAR_NEARMV) {
cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
@@ -7644,8 +8920,17 @@ static int64_t handle_inter_mode(
mbmi->mv[0].as_int = cur_mv[0].as_int;
}
- if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
- cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+ if (this_mode == NEW_NEARMV ||
+#if CONFIG_COMPOUND_SINGLEREF
+ this_mode == SR_NEAREST_NEARMV ||
+#endif // CONFIG_COMPOUND_SINGLEREF
+ this_mode == NEAR_NEARMV) {
+#if CONFIG_COMPOUND_SINGLEREF
+ if (this_mode == SR_NEAREST_NEARMV)
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ else
+#endif // CONFIG_COMPOUND_SINGLEREF
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
clamp_mv2(&cur_mv[1].as_mv, xd);
@@ -7653,7 +8938,7 @@ static int64_t handle_inter_mode(
mbmi->mv[1].as_int = cur_mv[1].as_int;
}
}
-#else
+#else // !CONFIG_EXT_INTER
if (this_mode == NEARMV && is_comp_pred) {
uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
@@ -7706,7 +8991,7 @@ static int64_t handle_inter_mode(
rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx);
}
- if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, 0) > ref_best_rd &&
+ if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
#if CONFIG_EXT_INTER
mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV
#else
@@ -7725,13 +9010,16 @@ static int64_t handle_inter_mode(
best_bmc_mbmi = *mbmi;
rate2_bmc_nocoeff = rd_stats->rate;
if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
-#if CONFIG_MOTION_VAR
rate_mv_bmc = rate_mv;
-#endif // CONFIG_MOTION_VAR
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
- if (is_comp_pred) {
+#if CONFIG_COMPOUND_SINGLEREF
+ if (is_comp_pred || is_singleref_comp_mode)
+#else
+ if (is_comp_pred)
+#endif // CONFIG_COMPOUND_SINGLEREF
+ {
int rate_sum, rs2;
int64_t dist_sum;
int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
@@ -7741,8 +9029,8 @@ static int64_t handle_inter_mode(
int tmp_skip_txfm_sb;
int64_t tmp_skip_sse_sb;
int compound_type_cost[COMPOUND_TYPES];
- uint8_t pred0[2 * MAX_SB_SQUARE];
- uint8_t pred1[2 * MAX_SB_SQUARE];
+ DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
uint8_t *preds0[1] = { pred0 };
uint8_t *preds1[1] = { pred1 };
int strides[1] = { bw };
@@ -7761,6 +9049,17 @@ static int64_t handle_inter_mode(
best_compound_data.seg_mask = tmp_mask_buf;
#endif // CONFIG_COMPOUND_SEGMENT
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // TODO(zoeliu): To further check whether the following setups are needed.
+ // Single ref compound mode: Prepare the 2nd ref frame predictor the same as
+ // the 1st one.
+ if (!is_comp_pred && is_singleref_comp_mode) {
+ xd->block_refs[1] = xd->block_refs[0];
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = xd->plane[i].pre[0];
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
if (masked_compound_used) {
av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
av1_compound_type_tree);
@@ -7773,7 +9072,7 @@ static int64_t handle_inter_mode(
for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
- if (!is_interinter_compound_used(cur_type, bsize)) break;
+ if (!is_interinter_compound_used(cur_type, bsize)) continue;
tmp_rate_mv = rate_mv;
best_rd_cur = INT64_MAX;
mbmi->interinter_compound_type = cur_type;
@@ -7792,8 +9091,7 @@ static int64_t handle_inter_mode(
&tmp_skip_txfm_sb, &tmp_skip_sse_sb,
INT64_MAX);
if (rd != INT64_MAX)
- best_rd_cur =
- RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
+ best_rd_cur = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
best_rd_compound = best_rd_cur;
break;
#if CONFIG_WEDGE
@@ -7923,8 +9221,7 @@ static int64_t handle_inter_mode(
av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd =
- RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum);
+ rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
if (rd < best_interintra_rd) {
best_interintra_rd = rd;
best_interintra_mode = mbmi->interintra_mode;
@@ -7939,7 +9236,7 @@ static int64_t handle_inter_mode(
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum);
best_interintra_rd = rd;
if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
@@ -7953,8 +9250,7 @@ static int64_t handle_inter_mode(
int_mv tmp_mv;
int rwedge = av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum,
- dist_sum);
+ rd = RDCOST(x->rdmult, rmode + rate_mv + rwedge + rate_sum, dist_sum);
best_interintra_rd_nowedge = best_interintra_rd;
// Disable wedge search if source variance is small
@@ -7968,7 +9264,7 @@ static int64_t handle_inter_mode(
pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
best_interintra_rd_wedge +=
- RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge, 0);
+ RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0);
// Refine motion vector.
if (have_newmv_in_inter_mode(this_mode)) {
// get negative of mask
@@ -7977,14 +9273,14 @@ static int64_t handle_inter_mode(
tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
mi_col, intrapred, mask, bw,
- &tmp_rate_mv, 0, 0);
+ &tmp_rate_mv, 0);
mbmi->mv[0].as_int = tmp_mv.as_int;
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv,
- rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
+ dist_sum);
if (rd >= best_interintra_rd_wedge) {
tmp_mv.as_int = cur_mv[0].as_int;
tmp_rate_mv = rate_mv;
@@ -8000,8 +9296,8 @@ static int64_t handle_inter_mode(
estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv,
- rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
+ rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
+ dist_sum);
best_interintra_rd_wedge = rd;
if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
mbmi->use_wedge_interintra = 1;
@@ -8042,7 +9338,7 @@ static int64_t handle_inter_mode(
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
&tmp_dist, &skip_txfm_sb, &skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+ rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist);
}
#endif // CONFIG_EXT_INTER
@@ -8097,10 +9393,7 @@ static int64_t handle_inter_mode(
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
single_newmv,
#if CONFIG_EXT_INTER
- rate2_bmc_nocoeff, &best_bmc_mbmi,
-#if CONFIG_MOTION_VAR
- rate_mv_bmc,
-#endif // CONFIG_MOTION_VAR
+ rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc,
#endif // CONFIG_EXT_INTER
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
@@ -8118,11 +9411,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
const TileInfo *tile = &xd->tile;
-#if CONFIG_EC_ADAPT
FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
-#else
- FRAME_CONTEXT *const ec_ctx = cm->fc;
-#endif // CONFIG_EC_ADAPT
MODE_INFO *const mi = xd->mi[0];
const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
@@ -8222,7 +9511,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
#endif
mbmi->use_intrabc = 1;
mbmi->mode = DC_PRED;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->mv[0].as_mv = dv;
#if CONFIG_DUAL_FILTER
for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
@@ -8233,12 +9522,12 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
x->skip = 0;
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
+ assert(x->mvcost == x->mv_cost_stack[0]);
+ // TODO(aconverse@google.com): The full motion field defining discount
+ // in MV_COST_WEIGHT is too large. Explore other values.
int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
- const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0);
- const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0);
- const int rate_mode = cpi->y_mode_costs[A][L][DC_PRED] +
- av1_cost_bit(ec_ctx->intrabc_prob, 1);
+ x->mvcost, MV_COST_WEIGHT_SUB);
+ const int rate_mode = av1_cost_bit(ec_ctx->intrabc_prob, 1);
RD_STATS rd_stats, rd_stats_uv;
av1_subtract_plane(x, bsize, 0);
@@ -8267,8 +9556,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
rdc_noskip.rate =
rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0);
rdc_noskip.dist = rd_stats.dist;
- rdc_noskip.rdcost =
- RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist);
+ rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist);
if (rdc_noskip.rdcost < best_rd) {
best_rd = rdc_noskip.rdcost;
best_mbmi = *mbmi;
@@ -8282,7 +9570,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
av1_init_rd_stats(&rdc_skip);
rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1);
rdc_skip.dist = rd_stats.sse;
- rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist);
+ rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist);
if (rdc_skip.rdcost < best_rd) {
best_rd = rdc_skip.rdcost;
best_mbmi = *mbmi;
@@ -8302,6 +9590,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
struct macroblockd_plane *const pd = xd->plane;
int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
int y_skip = 0, uv_skip = 0;
@@ -8310,11 +9599,11 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
const int unify_bsize = CONFIG_CB4X4;
ctx->skip = 0;
- xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
- xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE_FRAME;
#if CONFIG_INTRABC
- xd->mi[0]->mbmi.use_intrabc = 0;
- xd->mi[0]->mbmi.mv[0].as_int = 0;
+ mbmi->use_intrabc = 0;
+ mbmi->mv[0].as_int = 0;
#endif // CONFIG_INTRABC
const int64_t intra_yrd =
@@ -8325,9 +9614,29 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
&dist_y, &y_skip, best_rd);
if (intra_yrd < best_rd) {
- max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size]
- [pd[1].subsampling_x][pd[1].subsampling_y];
- init_sbuv_mode(&xd->mi[0]->mbmi);
+#if CONFIG_CFL
+ // Perform one extra txfm_rd_in_plane() call, this time with the best value
+ // so we can store reconstructed luma values
+ RD_STATS this_rd_stats;
+
+#if CONFIG_CB4X4
+ // Don't store the luma value if no chroma is associated.
+ // Don't worry, we will store this reconstructed luma in the following
+ // encode dry-run the chroma plane will never know.
+ x->cfl_store_y = !x->skip_chroma_rd;
+#else
+ x->cfl_store_y = 1;
+#endif
+
+ txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
+ mbmi->sb_type, mbmi->tx_size,
+ cpi->sf.use_fast_coef_costing);
+
+ x->cfl_store_y = 0;
+#endif
+ max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x]
+ [pd[1].subsampling_y];
+ init_sbuv_mode(mbmi);
#if CONFIG_CB4X4
if (!x->skip_chroma_rd)
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
@@ -8346,8 +9655,8 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
rate_y + rate_uv + av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
rd_cost->dist = dist_y + dist_uv;
}
- rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_cost->dist_y = dist_y;
#endif
} else {
@@ -8360,7 +9669,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
ctx->skip = x->skip; // FIXME where is the proper place to set this?!
assert(rd_cost->rate != INT_MAX);
- rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+ rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
}
#endif
if (rd_cost->rate == INT_MAX) return;
@@ -8494,7 +9803,8 @@ static void pick_filter_intra_interframe(
const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra,
int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
- PREDICTION_MODE *mode_uv, FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
+ UV_PREDICTION_MODE *mode_uv,
+ FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
#if CONFIG_EXT_INTRA
int8_t *uv_angle_delta,
#endif // CONFIG_EXT_INTRA
@@ -8531,7 +9841,7 @@ static void pick_filter_intra_interframe(
// TODO(huisu): use skip_mask for further speedup.
(void)skip_mask;
mbmi->mode = DC_PRED;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE_FRAME;
if (!rd_pick_filter_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
@@ -8600,7 +9910,8 @@ static void pick_filter_intra_interframe(
rate2 += write_uniform_cost(
FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
#if CONFIG_EXT_INTRA
- if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
+ if (av1_is_directional_mode(mbmi->uv_mode, bsize) &&
+ av1_use_angle_delta(bsize)) {
rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
}
@@ -8628,7 +9939,7 @@ static void pick_filter_intra_interframe(
} else {
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
}
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
if (this_rd < *best_intra_rd) {
*best_intra_rd = this_rd;
@@ -8693,6 +10004,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
unsigned char segment_id = mbmi->segment_id;
int comp_pred, i, k;
int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ int_mv frame_comp_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
#if CONFIG_EXT_INTER
@@ -8722,7 +10036,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
+#if CONFIG_EXT_COMP_REFS
+ unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
+#else
unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_EXT_COMP_REFS
aom_prob comp_mode_p;
int64_t best_intra_rd = INT64_MAX;
unsigned int best_pred_sse = UINT_MAX;
@@ -8730,7 +10048,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
int64_t dist_uvs[TX_SIZES_ALL];
int skip_uvs[TX_SIZES_ALL];
- PREDICTION_MODE mode_uv[TX_SIZES_ALL];
+ UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
#if CONFIG_PALETTE
PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
#endif // CONFIG_PALETTE
@@ -8747,7 +10065,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
int best_skip2 = 0;
- uint8_t ref_frame_skip_mask[2] = { 0 };
+ uint16_t ref_frame_skip_mask[2] = { 0 };
uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
@@ -8850,6 +10168,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
*returnrate_nocoef = INT_MAX;
#endif // CONFIG_SUPERTX
+#if CONFIG_SPEED_REFS
+ memset(x->mbmi_ext->ref_mvs, 0, sizeof(x->mbmi_ext->ref_mvs));
+#endif // CONFIG_SPEED_REFS
+
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
x->mbmi_ext->mode_context[ref_frame] = 0;
@@ -8873,6 +10195,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_GLOBAL_MOTION
#if CONFIG_EXT_INTER
frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+#if CONFIG_COMPOUND_SINGLEREF
+ frame_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+ frame_comp_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+#endif // CONFIG_COMPOUND_SINGLEREF
#if CONFIG_GLOBAL_MOTION
frame_mv[ZERO_ZEROMV][ref_frame].as_int =
gm_get_motion_vector(&cm->global_motion[ref_frame],
@@ -8934,6 +10260,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
// Skip checking missing references in both single and compound reference
// modes. Note that a mode will be skipped iff both reference frames
// are masked out.
+#if CONFIG_EXT_COMP_REFS
+ ref_frame_skip_mask[0] |= (1 << ref_frame);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+#else // !CONFIG_EXT_COMP_REFS
#if CONFIG_EXT_REFS
if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) {
ref_frame_skip_mask[0] |= (1 << ref_frame);
@@ -8945,6 +10275,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#if CONFIG_EXT_REFS
}
#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_COMP_REFS
} else {
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
// Skip fixed mv modes for poor references
@@ -9000,6 +10331,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
+#if CONFIG_COMPOUND_SINGLEREF
+ if (frame_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int ||
+ frame_comp_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int !=
+ zeromv.as_int)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << SR_NEAREST_NEARMV);
+#endif // CONFIG_COMPOUND_SINGLEREF
#endif // CONFIG_EXT_INTER
}
}
@@ -9077,7 +10414,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int compmode_cost = 0;
int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
int64_t distortion2_y = 0;
int64_t total_sse_y = INT64_MAX;
#endif
@@ -9106,6 +10443,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
frame_mv[this_mode][second_ref_frame].as_int =
frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(this_mode)) {
+ frame_mv[this_mode][ref_frame].as_int =
+ frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
+ frame_comp_mv[this_mode][ref_frame].as_int =
+ frame_mv[compound_ref1_mode(this_mode)][ref_frame].as_int;
+#endif // CONFIG_COMPOUND_SINGLEREF
}
#endif // CONFIG_EXT_INTER
@@ -9154,6 +10498,34 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
(ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
continue;
+#if CONFIG_EXT_COMP_REFS
+// TODO(zoeliu): Following toggle between #if 0/1 and the bug will manifest
+// itself.
+#if 0
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame]) ||
+ (second_ref_frame > INTRA_FRAME &&
+ (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))))
+ printf("Frame=%d, bsize=%d, (mi_row,mi_col)=(%d,%d), ref_frame=%d, "
+ "second_ref_frame=%d\n", cm->current_video_frame, bsize, mi_row,
+ mi_col, ref_frame, second_ref_frame);
+
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
+ if (second_ref_frame > INTRA_FRAME &&
+ (!(cpi->ref_frame_flags & flag_list[second_ref_frame])))
+ continue;
+#endif // 0
+
+#if !USE_UNI_COMP_REFS
+ // NOTE(zoeliu): Temporarily disable uni-directional comp refs
+ if (second_ref_frame > INTRA_FRAME) {
+ if (!((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME)))
+ continue;
+ }
+ assert(second_ref_frame <= INTRA_FRAME ||
+ ((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME)));
+#endif // !USE_UNI_COMP_REFS
+#endif // CONFIG_EXT_COMP_REFS
+
if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
// Test best rd so far against threshold for trying this mode.
@@ -9239,7 +10611,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
}
mbmi->mode = this_mode;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->ref_frame[0] = ref_frame;
mbmi->ref_frame[1] = second_ref_frame;
#if CONFIG_PALETTE
@@ -9267,6 +10639,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // Single ref compound mode
+ if (!comp_pred && is_inter_singleref_comp_mode(mbmi->mode)) {
+ xd->block_refs[1] = xd->block_refs[0];
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = xd->plane[i].pre[0];
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
@@ -9277,7 +10658,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
struct macroblockd_plane *const pd = &xd->plane[1];
#if CONFIG_EXT_INTRA
is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
- if (is_directional_mode) {
+ if (is_directional_mode && av1_use_angle_delta(bsize)) {
int rate_dummy;
int64_t model_rd = INT64_MAX;
if (!angle_stats_ready) {
@@ -9390,10 +10771,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (av1_is_intra_filter_switchable(p_angle))
rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
#endif // CONFIG_INTRA_INTERP
- rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
- MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+ if (av1_use_angle_delta(bsize)) {
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
+ MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
+ }
}
- if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+ if (av1_is_directional_mode(mbmi->uv_mode, bsize) &&
+ av1_use_angle_delta(bsize)) {
rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
}
@@ -9409,7 +10793,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
mbmi->filter_intra_mode_info.filter_intra_mode[0]);
}
}
- if (mbmi->uv_mode == DC_PRED) {
+ if (mbmi->uv_mode == UV_DC_PRED) {
rate2 +=
av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
@@ -9422,7 +10806,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) distortion2_y = distortion_y;
#endif
} else {
@@ -9481,6 +10865,27 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
}
}
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(mbmi->mode)) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+ // TODO(zoeliu): To further investigate which ref_mv_idx should be
+ // chosen for the mode of SR_NEAR_NEWMV.
+ int ref_mv_idx = 0;
+ // Special case: SR_NEAR_NEWMV mode use
+ // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
+ // mbmi->ref_mv_idx (like NEWMV)
+ if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1;
+
+ if (compound_ref0_mode(mbmi->mode) == NEWMV ||
+ compound_ref1_mode(mbmi->mode) == NEWMV) {
+ int_mv this_mv =
+ mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
+ xd->n8_h << MI_SIZE_LOG2, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
+ }
+ }
+#endif // CONFIG_COMPOUND_SINGLEREF
} else {
#endif // CONFIG_EXT_INTER
if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
@@ -9500,6 +10905,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
{
RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
av1_init_rd_stats(&rd_stats);
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ // While av1 master uses rd_stats_y.rate through out the codebase,
+ // which is set when handle_inter_moden is called, the daala-dist code
+ // in rd_pick_partition() for cb4x4 and sub8x8 blocks need to know
+ // .dist_y which comes from rd_stats_y.dist and rd_stats_y.sse.
+ // The problem is rd_stats_y.dist and rd_stats_y.sse are sometimes not
+ // initialized when rd_stats.skip = 1,
+ // then instead rd_stats.dist and rd_stats.sse have the
+ // combined luma and chroma dist and sse.
+ // This can be seen inside motion_mode_rd(), which is called by
+ // handle_inter_mode().
+ if (bsize < BLOCK_8X8) av1_init_rd_stats(&rd_stats_y);
+#endif
rd_stats.rate = rate2;
// Point to variables that are maintained between loop iterations
@@ -9510,6 +10928,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_EXT_INTER
this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
&rd_stats_uv, &disable_skip, frame_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ frame_comp_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
mi_row, mi_col, &args, best_rd);
rate2 = rd_stats.rate;
@@ -9518,23 +10939,39 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
total_sse = rd_stats.sse;
rate_y = rd_stats_y.rate;
rate_uv = rd_stats_uv.rate;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ if (rd_stats_y.rate != INT_MAX) {
+ assert(rd_stats_y.sse < INT64_MAX);
+ assert(rd_stats_y.dist < INT64_MAX);
+ }
+ total_sse_y = rd_stats_y.sse;
+ distortion2_y = rd_stats_y.dist;
+ }
#endif
}
// TODO(jingning): This needs some refactoring to improve code quality
// and reduce redundant steps.
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ if ((have_nearmv_in_inter_mode(mbmi->mode) &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
+ ((mbmi->mode == NEWMV || mbmi->mode == SR_NEW_NEWMV ||
+ mbmi->mode == NEW_NEWMV) &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 1))
+#else // !CONFIG_COMPOUND_SINGLEREF
if ((have_nearmv_in_inter_mode(mbmi->mode) &&
mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
- mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
-#else
+ mbmi_ext->ref_mv_count[ref_frame_type] > 1))
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
if ((mbmi->mode == NEARMV &&
mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
- (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
-#endif
+ (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1))
+#endif // CONFIG_EXT_INTER
+ {
int_mv backup_mv = frame_mv[NEARMV][ref_frame];
MB_MODE_INFO backup_mbmi = *mbmi;
int backup_skip = x->skip;
@@ -9560,18 +10997,16 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
rate2 += (rate2 < INT_MAX ? cpi->drl_mode_cost0[drl_ctx][0] : 0);
if (this_rd < INT64_MAX) {
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
- RDCOST(x->rdmult, x->rddiv, 0, total_sse))
- tmp_ref_rd =
- RDCOST(x->rdmult, x->rddiv,
- rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
- distortion2);
+ if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) <
+ RDCOST(x->rdmult, 0, total_sse))
+ tmp_ref_rd = RDCOST(
+ x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
+ distortion2);
else
- tmp_ref_rd =
- RDCOST(x->rdmult, x->rddiv,
- rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
- rate_y - rate_uv,
- total_sse);
+ tmp_ref_rd = RDCOST(
+ x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
+ rate_y - rate_uv,
+ total_sse);
}
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
@@ -9587,6 +11022,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
av1_invalid_rd_stats(&tmp_rd_stats);
+
x->skip = 0;
mbmi->ref_mv_idx = 1 + ref_idx;
@@ -9627,6 +11063,34 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
xd->n8_h << MI_SIZE_LOG2, xd);
mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
}
+#if CONFIG_COMPOUND_SINGLEREF
+ } else if (is_inter_singleref_comp_mode(mbmi->mode)) {
+ int ref_mv_idx = mbmi->ref_mv_idx;
+ // Special case: SR_NEAR_NEWMV mode use
+ // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
+ // mbmi->ref_mv_idx (like NEWMV)
+ if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1 + mbmi->ref_mv_idx;
+
+ // TODO(zoeliu): For the mode of SR_NEAREST_NEWMV, as it only runs
+ // the "if", not the "else if",
+ // mbmi_ext->ref_mvs[mbmi->ref_frame[0]] takes the
+ // value for "NEWMV", instead of "NEARESTMV".
+ if (compound_ref0_mode(mbmi->mode) == NEWMV ||
+ compound_ref1_mode(mbmi->mode) == NEWMV) {
+ int_mv this_mv =
+ mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
+ xd->n8_h << MI_SIZE_LOG2, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
+ } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV ||
+ compound_ref1_mode(mbmi->mode) == NEARESTMV) {
+ int_mv this_mv =
+ mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
+ xd->n8_h << MI_SIZE_LOG2, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
+ }
+#endif // CONFIG_COMPOUND_SINGLEREF
} else {
#endif // CONFIG_EXT_INTER
for (ref = 0; ref < 1 + comp_pred; ++ref) {
@@ -9657,16 +11121,28 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
frame_mv[NEARMV][ref_frame] = cur_mv;
av1_init_rd_stats(&tmp_rd_stats);
-
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ // With the same reason as 'rd_stats_y' passed to above
+ // handle_inter_mode(), tmp_rd_stats_y.dist and
+ // tmp_rd_stats_y.sse are sometimes not initialized, esp. when
+ // tmp_rd_stats.skip = 1 and tmp_rd_stats.dist and .sse
+ // represent combined luma and chroma .dist and .sse,
+ // we should initialized tmp_rd_stats_y.
+ if (bsize < BLOCK_8X8) av1_init_rd_stats(&tmp_rd_stats_y);
+#endif
// Point to variables that are not maintained between iterations
args.single_newmv = dummy_single_newmv;
#if CONFIG_EXT_INTER
args.single_newmv_rate = dummy_single_newmv_rate;
args.modelled_rd = NULL;
#endif // CONFIG_EXT_INTER
- tmp_alt_rd = handle_inter_mode(
- cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv,
- &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
+ tmp_alt_rd = handle_inter_mode(cpi, x, bsize, &tmp_rd_stats,
+ &tmp_rd_stats_y, &tmp_rd_stats_uv,
+ &dummy_disable_skip, frame_mv,
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ frame_comp_mv,
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ mi_row, mi_col, &args, best_rd);
// Prevent pointers from escaping local scope
args.single_newmv = NULL;
#if CONFIG_EXT_INTER
@@ -9696,25 +11172,22 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (tmp_alt_rd < INT64_MAX) {
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- tmp_alt_rd = RDCOST(x->rdmult, x->rddiv, tmp_rd_stats.rate,
- tmp_rd_stats.dist);
+ tmp_alt_rd =
+ RDCOST(x->rdmult, tmp_rd_stats.rate, tmp_rd_stats.dist);
#else
- if (RDCOST(x->rdmult, x->rddiv,
- tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
+ if (RDCOST(x->rdmult, tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
tmp_rd_stats.dist) <
- RDCOST(x->rdmult, x->rddiv, 0, tmp_rd_stats.sse))
- tmp_alt_rd =
- RDCOST(x->rdmult, x->rddiv,
- tmp_rd_stats.rate +
- av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
- tmp_rd_stats.dist);
+ RDCOST(x->rdmult, 0, tmp_rd_stats.sse))
+ tmp_alt_rd = RDCOST(
+ x->rdmult, tmp_rd_stats.rate +
+ av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
+ tmp_rd_stats.dist);
else
- tmp_alt_rd =
- RDCOST(x->rdmult, x->rddiv,
- tmp_rd_stats.rate +
- av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
- tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
- tmp_rd_stats.sse);
+ tmp_alt_rd = RDCOST(
+ x->rdmult, tmp_rd_stats.rate +
+ av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
+ tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
+ tmp_rd_stats.sse);
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
}
@@ -9730,8 +11203,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
tmp_ref_rd = tmp_alt_rd;
backup_mbmi = *mbmi;
backup_skip = x->skip;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) {
+ if (tmp_rd_stats_y.rate != INT_MAX) {
+ assert(tmp_rd_stats_y.sse < INT64_MAX);
+ assert(tmp_rd_stats_y.dist < INT64_MAX);
+ }
total_sse_y = tmp_rd_stats_y.sse;
distortion2_y = tmp_rd_stats_y.dist;
}
@@ -9774,19 +11251,33 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
// Estimate the reference frame signaling cost and add it
// to the rolling cost variable.
if (comp_pred) {
+#if CONFIG_EXT_COMP_REFS
+ rate2 += ref_costs_comp[ref_frame][second_ref_frame];
+#else // !CONFIG_EXT_COMP_REFS
rate2 += ref_costs_comp[ref_frame];
#if CONFIG_EXT_REFS
rate2 += ref_costs_comp[second_ref_frame];
#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_COMP_REFS
} else {
rate2 += ref_costs_single[ref_frame];
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // Add the cost to signal single/comp mode in single ref.
+ if (!comp_pred && cm->reference_mode != COMPOUND_REFERENCE) {
+ aom_prob singleref_comp_mode_p = av1_get_inter_mode_prob(cm, xd);
+ rate2 += av1_cost_bit(singleref_comp_mode_p,
+ is_inter_singleref_comp_mode(mbmi->mode));
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- if (ref_frame == INTRA_FRAME) {
+ if (ref_frame == INTRA_FRAME)
#else
- if (!disable_skip) {
+ if (!disable_skip)
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+ {
if (skippable) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
@@ -9795,9 +11286,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
// Cost the skip mb case
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
} else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0,
- distortion2) <
- RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) {
+ if (RDCOST(x->rdmult, rate_y + rate_uv + rate_skip0, distortion2) <
+ RDCOST(x->rdmult, rate_skip1, total_sse)) {
// Add in the cost of the no skip flag.
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
} else {
@@ -9809,8 +11299,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
this_skip2 = 1;
rate_y = 0;
rate_uv = 0;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) distortion2_y = total_sse_y;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ assert(total_sse_y < INT64_MAX);
+ distortion2_y = total_sse_y;
+ }
#endif
}
} else {
@@ -9819,11 +11312,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
}
// Calculate the final RD estimate for this mode.
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
} else {
this_skip2 = mbmi->skip;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
if (this_skip2) {
rate_y = 0;
rate_uv = 0;
@@ -9831,6 +11324,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
}
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if ((bsize < BLOCK_8X8) && (rate2 != INT_MAX)) {
+ assert(distortion2_y < INT64_MAX);
+ }
+#endif
+
if (ref_frame == INTRA_FRAME) {
// Keep record of best intra rd
if (this_rd < best_intra_rd) {
@@ -9875,12 +11374,18 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
*returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
mbmi->ref_frame[0] != INTRA_FRAME);
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+#if CONFIG_WARPED_MOTION
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+#endif
#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
MODE_INFO *const mi = xd->mi[0];
const MOTION_MODE motion_allowed = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#if CONFIG_GLOBAL_MOTION
0, xd->global_motion,
-#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
mi);
if (motion_allowed == WARPED_CAUSAL)
*returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
@@ -9901,8 +11406,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
this_skip2 || skippable);
best_rate_uv = rate_uv;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ assert(distortion2_y < INT64_MAX);
+ rd_cost->dist_y = distortion2_y;
+ }
#endif
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
@@ -9911,7 +11419,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_VAR_TX
}
}
-
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
+ assert(rd_cost->dist_y < INT64_MAX);
+ }
+#endif
/* keep record of best compound/single-only prediction */
if (!disable_skip && ref_frame != INTRA_FRAME) {
int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
@@ -9924,8 +11436,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
hybrid_rate = rate2 + compmode_cost;
}
- single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
- hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
+ single_rd = RDCOST(x->rdmult, single_rate, distortion2);
+ hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
if (!comp_pred) {
if (single_rd < best_pred_rd[SINGLE_REFERENCE])
@@ -9963,6 +11475,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
}
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+ // Single ref compound mode
+ if (!has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode)) {
+ xd->block_refs[1] = xd->block_refs[0];
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = xd->plane[i].pre[0];
+ }
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+
if (is_inter_mode(mbmi->mode)) {
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
#if CONFIG_MOTION_VAR
@@ -9996,9 +11517,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
}
- if (RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
+ if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
(rd_stats_y.dist + rd_stats_uv.dist)) >
- RDCOST(x->rdmult, x->rddiv, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
+ RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
skip_blk = 1;
rd_stats_y.rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
rd_stats_uv.rate = 0;
@@ -10009,8 +11530,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
rd_stats_y.rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
}
- if (RDCOST(x->rdmult, x->rddiv, best_rate_y + best_rate_uv, rd_cost->dist) >
- RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
+ if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
+ RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
(rd_stats_y.dist + rd_stats_uv.dist))) {
#if CONFIG_VAR_TX
int idx, idy;
@@ -10031,15 +11552,24 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
rd_cost->rate +=
(rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
- if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist;
-#endif
- rd_cost->rdcost =
- RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+ rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
best_skip2 = skip_blk;
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ assert(rd_cost->rate != INT_MAX);
+ assert(rd_cost->dist_y < INT64_MAX);
+ rd_cost->dist_y = rd_stats_y.dist;
+ }
+#endif
}
}
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+ if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
+ assert(rd_cost->dist_y < INT64_MAX);
+ }
+#endif
+
#if CONFIG_PALETTE
// Only try palette mode when the best mode so far is an intra mode.
if (try_palette && !is_inter_mode(best_mbmode.mode)) {
@@ -10058,7 +11588,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
MB_MODE_INFO best_mbmi_palette = best_mbmode;
mbmi->mode = DC_PRED;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE_FRAME;
rate_overhead_palette = rd_pick_palette_intra_sby(
@@ -10119,7 +11649,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_SUPERTX
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
}
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
if (this_rd < best_rd) {
best_mode_index = 3;
mbmi->mv[0].as_int = 0;
@@ -10165,10 +11695,14 @@ PALETTE_EXIT:
}
#endif // CONFIG_FILTER_INTRA
- // The inter modes' rate costs are not calculated precisely in some cases.
- // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
- // ZEROMV. Here, checks are added for those cases, and the mode decisions
- // are corrected.
+// The inter modes' rate costs are not calculated precisely in some cases.
+// Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
+// ZEROMV. Here, checks are added for those cases, and the mode decisions
+// are corrected.
+#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+// NOTE: For SR_NEW_NEWMV, no need to check as the two mvs from the same ref
+// are surely different from each other.
+#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
if (best_mbmode.mode == NEWMV
#if CONFIG_EXT_INTER
|| best_mbmode.mode == NEW_NEWMV
@@ -10248,8 +11782,9 @@ PALETTE_EXIT:
}
if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
- nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
+ nearestmv[1].as_int == best_mbmode.mv[1].as_int)
#if CONFIG_EXT_INTER
+ {
best_mbmode.mode = NEAREST_NEARESTMV;
} else {
int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
@@ -10274,6 +11809,7 @@ PALETTE_EXIT:
best_mbmode.mode = ZERO_ZEROMV;
}
#else
+ {
best_mbmode.mode = NEARESTMV;
} else if (best_mbmode.mv[0].as_int == zeromv[0].as_int &&
best_mbmode.mv[1].as_int == zeromv[1].as_int) {
@@ -10287,11 +11823,18 @@ PALETTE_EXIT:
// using a mode which can support ref_mv_idx
if (best_mbmode.ref_mv_idx != 0 &&
#if CONFIG_EXT_INTER
+#if CONFIG_COMPOUND_SINGLEREF
+ !(best_mbmode.mode == NEWMV || best_mbmode.mode == SR_NEW_NEWMV ||
+ best_mbmode.mode == NEW_NEWMV ||
+ have_nearmv_in_inter_mode(best_mbmode.mode)))
+#else // !CONFIG_COMPOUND_SINGLEREF
!(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
- have_nearmv_in_inter_mode(best_mbmode.mode))) {
-#else
- !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) {
-#endif
+ have_nearmv_in_inter_mode(best_mbmode.mode)))
+#endif // CONFIG_COMPOUND_SINGLEREF
+#else // !CONFIG_EXT_INTER
+ !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV))
+#endif // CONFIG_EXT_INTER
+ {
best_mbmode.ref_mv_idx = 0;
}
@@ -10377,11 +11920,12 @@ PALETTE_EXIT:
) {
#if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
// Correct the motion mode for ZEROMV
- const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
-#if SEPARATE_GLOBAL_MOTION
- 0, xd->global_motion,
-#endif // SEPARATE_GLOBAL_MOTION
- xd->mi[0]);
+ const MOTION_MODE last_motion_mode_allowed =
+ motion_mode_allowed(0, xd->global_motion,
+#if CONFIG_WARPED_MOTION
+ xd,
+#endif
+ xd->mi[0]);
if (mbmi->motion_mode > last_motion_mode_allowed)
mbmi->motion_mode = last_motion_mode_allowed;
#endif // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
@@ -10445,7 +11989,11 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
int i;
int64_t best_pred_diff[REFERENCE_MODES];
unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
+#if CONFIG_EXT_COMP_REFS
+ unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
+#else
unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_EXT_COMP_REFS
aom_prob comp_mode_p;
InterpFilter best_filter = SWITCHABLE;
int64_t this_rd = INT64_MAX;
@@ -10476,7 +12024,7 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
#endif // CONFIG_FILTER_INTRA
mbmi->mode = ZEROMV;
mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->uv_mode = DC_PRED;
+ mbmi->uv_mode = UV_DC_PRED;
mbmi->ref_frame[0] = LAST_FRAME;
mbmi->ref_frame[1] = NONE_FRAME;
#if CONFIG_GLOBAL_MOTION
@@ -10501,7 +12049,17 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
#if CONFIG_WARPED_MOTION
if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#if WARPED_MOTION_SORT_SAMPLES
+ int pts_mv[SAMPLES_ARRAY_SIZE];
+ mbmi->num_proj_ref[0] =
+ findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
+ // Rank the samples by motion vector difference
+ if (mbmi->num_proj_ref[0] > 1)
+ mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
+ pts_inref, mbmi->num_proj_ref[0]);
+#else
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
+#endif // WARPED_MOTION_SORT_SAMPLES
}
#endif
@@ -10548,12 +12106,12 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
// Estimate the reference frame signaling cost and add it
// to the rolling cost variable.
rate2 += ref_costs_single[LAST_FRAME];
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, rate2, distortion2);
rd_cost->rate = rate2;
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
-#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
#endif
if (this_rd >= best_rd_so_far) {
@@ -10646,7 +12204,8 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
// handle above row
if (xd->up_available) {
- const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
+ const int overlap =
+ AOMMIN(block_size_high[bsize] >> 1, block_size_high[BLOCK_64X64] >> 1);
const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
const int mi_row_offset = -1;
const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
@@ -10666,7 +12225,9 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
&xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi;
#endif
const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
- const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
+ const int above_step =
+ AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]);
+ const int mi_step = AOMMIN(xd->n8_w, above_step);
const int neighbor_bw = mi_step * MI_SIZE;
if (is_neighbor_overlappable(above_mbmi)) {
@@ -10725,7 +12286,8 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
// handle left column
if (xd->left_available) {
- const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
+ const int overlap =
+ AOMMIN(block_size_wide[bsize] >> 1, block_size_wide[BLOCK_64X64] >> 1);
const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
const int mi_col_offset = -1;
const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
@@ -10746,7 +12308,9 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
&xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi;
#endif
const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
- const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
+ const int left_step =
+ AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]);
+ const int mi_step = AOMMIN(xd->n8_h, left_step);
const int neighbor_bh = mi_step * MI_SIZE;
if (is_neighbor_overlappable(left_mbmi)) {
@@ -10854,8 +12418,23 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
av1_subtract_plane(x, bsize, 0);
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
+ select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ } else {
+ int idx, idy;
+ super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+ memset(x->blk_skip[0], rd_stats_y.skip,
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+ }
+ inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#else
super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#endif
assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
if (rd_stats_y.skip && rd_stats_uv.skip) {
rd_stats_y.rate = rate_skip1;
@@ -10863,10 +12442,10 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
rd_stats_y.dist = rd_stats_y.sse;
rd_stats_uv.dist = rd_stats_uv.sse;
skip_blk = 0;
- } else if (RDCOST(x->rdmult, x->rddiv,
+ } else if (RDCOST(x->rdmult,
(rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
(rd_stats_y.dist + rd_stats_uv.dist)) >
- RDCOST(x->rdmult, x->rddiv, rate_skip1,
+ RDCOST(x->rdmult, rate_skip1,
(rd_stats_y.sse + rd_stats_uv.sse))) {
rd_stats_y.rate = rate_skip1;
rd_stats_uv.rate = 0;
@@ -10879,18 +12458,33 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
}
backup_skip = skip_blk;
backup_mbmi = *mbmi;
- rd_causal = RDCOST(x->rdmult, x->rddiv, (rd_stats_y.rate + rd_stats_uv.rate),
+ rd_causal = RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate),
(rd_stats_y.dist + rd_stats_uv.dist));
- rd_causal += RDCOST(x->rdmult, x->rddiv,
- av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0);
+ rd_causal +=
+ RDCOST(x->rdmult, av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0);
// Check non-causal mode
mbmi->motion_mode = OBMC_CAUSAL;
av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
av1_subtract_plane(x, bsize, 0);
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
+ select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ } else {
+ int idx, idy;
+ super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+ memset(x->blk_skip[0], rd_stats_y.skip,
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+ }
+ inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#else
super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#endif
assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
if (rd_stats_y.skip && rd_stats_uv.skip) {
rd_stats_y.rate = rate_skip1;
@@ -10898,10 +12492,10 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
rd_stats_y.dist = rd_stats_y.sse;
rd_stats_uv.dist = rd_stats_uv.sse;
skip_blk = 0;
- } else if (RDCOST(x->rdmult, x->rddiv,
+ } else if (RDCOST(x->rdmult,
(rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
(rd_stats_y.dist + rd_stats_uv.dist)) >
- RDCOST(x->rdmult, x->rddiv, rate_skip1,
+ RDCOST(x->rdmult, rate_skip1,
(rd_stats_y.sse + rd_stats_uv.sse))) {
rd_stats_y.rate = rate_skip1;
rd_stats_uv.rate = 0;
@@ -10914,9 +12508,8 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
}
if (rd_causal >
- RDCOST(x->rdmult, x->rddiv,
- rd_stats_y.rate + rd_stats_uv.rate +
- av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
+ RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate +
+ av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
(rd_stats_y.dist + rd_stats_uv.dist))) {
x->skip = skip_blk;
} else {