summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/encoder')
-rw-r--r--third_party/aom/av1/encoder/aq_cyclicrefresh.c4
-rw-r--r--third_party/aom/av1/encoder/av1_quantize.c54
-rw-r--r--third_party/aom/av1/encoder/av1_quantize.h15
-rw-r--r--third_party/aom/av1/encoder/bitstream.c1134
-rw-r--r--third_party/aom/av1/encoder/block.h28
-rw-r--r--third_party/aom/av1/encoder/context_tree.h4
-rw-r--r--third_party/aom/av1/encoder/corner_match.c15
-rw-r--r--third_party/aom/av1/encoder/corner_match.h4
-rw-r--r--third_party/aom/av1/encoder/daala_compat_enc.c8
-rw-r--r--third_party/aom/av1/encoder/dct.c57
-rw-r--r--third_party/aom/av1/encoder/encodeframe.c1325
-rw-r--r--third_party/aom/av1/encoder/encodeframe.h9
-rw-r--r--third_party/aom/av1/encoder/encodemb.c701
-rw-r--r--third_party/aom/av1/encoder/encodemb.h20
-rw-r--r--third_party/aom/av1/encoder/encodemv.c124
-rw-r--r--third_party/aom/av1/encoder/encoder.c729
-rw-r--r--third_party/aom/av1/encoder/encoder.h114
-rw-r--r--third_party/aom/av1/encoder/encodetxb.c1149
-rw-r--r--third_party/aom/av1/encoder/encodetxb.h51
-rw-r--r--third_party/aom/av1/encoder/ethread.c53
-rw-r--r--third_party/aom/av1/encoder/firstpass.c60
-rw-r--r--third_party/aom/av1/encoder/firstpass.h15
-rw-r--r--third_party/aom/av1/encoder/global_motion.c34
-rw-r--r--third_party/aom/av1/encoder/global_motion.h15
-rw-r--r--third_party/aom/av1/encoder/hybrid_fwd_txfm.c52
-rw-r--r--third_party/aom/av1/encoder/mathutils.h354
-rw-r--r--third_party/aom/av1/encoder/mbgraph.c16
-rw-r--r--third_party/aom/av1/encoder/mcomp.c1067
-rw-r--r--third_party/aom/av1/encoder/mcomp.h49
-rw-r--r--third_party/aom/av1/encoder/palette.c107
-rw-r--r--third_party/aom/av1/encoder/palette.h22
-rw-r--r--third_party/aom/av1/encoder/pickrst.c692
-rw-r--r--third_party/aom/av1/encoder/pvq_encoder.c52
-rw-r--r--third_party/aom/av1/encoder/ransac.c335
-rw-r--r--third_party/aom/av1/encoder/ratectrl.c120
-rw-r--r--third_party/aom/av1/encoder/ratectrl.h29
-rw-r--r--third_party/aom/av1/encoder/rd.c146
-rw-r--r--third_party/aom/av1/encoder/rd.h15
-rw-r--r--third_party/aom/av1/encoder/rdopt.c3785
-rw-r--r--third_party/aom/av1/encoder/rdopt.h16
-rw-r--r--third_party/aom/av1/encoder/speed_features.c23
-rw-r--r--third_party/aom/av1/encoder/speed_features.h62
-rw-r--r--third_party/aom/av1/encoder/subexp.c77
-rw-r--r--third_party/aom/av1/encoder/subexp.h7
-rw-r--r--third_party/aom/av1/encoder/temporal_filter.c13
-rw-r--r--third_party/aom/av1/encoder/tokenize.c123
-rw-r--r--third_party/aom/av1/encoder/tokenize.h18
-rw-r--r--third_party/aom/av1/encoder/variance_tree.c61
-rw-r--r--third_party/aom/av1/encoder/variance_tree.h96
-rw-r--r--third_party/aom/av1/encoder/x86/av1_quantize_sse2.c93
-rw-r--r--third_party/aom/av1/encoder/x86/corner_match_sse4.c91
-rw-r--r--third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c302
-rw-r--r--third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c180
53 files changed, 6143 insertions, 7582 deletions
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.c b/third_party/aom/av1/encoder/aq_cyclicrefresh.c
index e41c608b6..b2b410617 100644
--- a/third_party/aom/av1/encoder/aq_cyclicrefresh.c
+++ b/third_party/aom/av1/encoder/aq_cyclicrefresh.c
@@ -353,8 +353,8 @@ void av1_cyclic_refresh_check_golden_update(AV1_COMP *const cpi) {
// frame because of the camera movement, set this frame as the golden frame.
// Use 70% and 5% as the thresholds for golden frame refreshing.
// Also, force this frame as a golden update frame if this frame will change
- // the resolution (resize_pending != 0).
- if (cpi->resize_pending != 0 ||
+ // the resolution (av1_resize_pending != 0).
+ if (av1_resize_pending(cpi) ||
(cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
av1_cyclic_refresh_set_golden_update(cpi);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
diff --git a/third_party/aom/av1/encoder/av1_quantize.c b/third_party/aom/av1/encoder/av1_quantize.c
index 6cffac264..63727df1f 100644
--- a/third_party/aom/av1/encoder/av1_quantize.c
+++ b/third_party/aom/av1/encoder/av1_quantize.c
@@ -1594,50 +1594,48 @@ static int get_qzbin_factor(int q, aom_bit_depth_t bit_depth) {
#endif
}
-void av1_init_quantizer(AV1_COMP *cpi) {
- AV1_COMMON *const cm = &cpi->common;
- QUANTS *const quants = &cpi->quants;
+void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
+ int uv_dc_delta_q, int uv_ac_delta_q,
+ QUANTS *const quants, Dequants *const deq) {
int i, q, quant;
-#if CONFIG_NEW_QUANT
- int dq;
-#endif
for (q = 0; q < QINDEX_RANGE; q++) {
- const int qzbin_factor = get_qzbin_factor(q, cm->bit_depth);
+ const int qzbin_factor = get_qzbin_factor(q, bit_depth);
const int qrounding_factor = q == 0 ? 64 : 48;
for (i = 0; i < 2; ++i) {
int qrounding_factor_fp = 64;
// y
- quant = i == 0 ? av1_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth)
- : av1_ac_quant(q, 0, cm->bit_depth);
+ quant = i == 0 ? av1_dc_quant(q, y_dc_delta_q, bit_depth)
+ : av1_ac_quant(q, 0, bit_depth);
invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
quants->y_quant_fp[q][i] = (1 << 16) / quant;
quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
- cpi->y_dequant[q][i] = quant;
+ deq->y_dequant[q][i] = quant;
// uv
- quant = i == 0 ? av1_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth)
- : av1_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth);
+ quant = i == 0 ? av1_dc_quant(q, uv_dc_delta_q, bit_depth)
+ : av1_ac_quant(q, uv_ac_delta_q, bit_depth);
invert_quant(&quants->uv_quant[q][i], &quants->uv_quant_shift[q][i],
quant);
quants->uv_quant_fp[q][i] = (1 << 16) / quant;
quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
- cpi->uv_dequant[q][i] = quant;
+ deq->uv_dequant[q][i] = quant;
}
#if CONFIG_NEW_QUANT
+ int dq;
for (dq = 0; dq < QUANT_PROFILES; dq++) {
for (i = 0; i < COEF_BANDS; i++) {
- const int y_quant = cpi->y_dequant[q][i != 0];
- const int uvquant = cpi->uv_dequant[q][i != 0];
- av1_get_dequant_val_nuq(y_quant, i, cpi->y_dequant_val_nuq[dq][q][i],
+ const int y_quant = deq->y_dequant[q][i != 0];
+ const int uvquant = deq->uv_dequant[q][i != 0];
+ av1_get_dequant_val_nuq(y_quant, i, deq->y_dequant_val_nuq[dq][q][i],
quants->y_cuml_bins_nuq[dq][q][i], dq);
- av1_get_dequant_val_nuq(uvquant, i, cpi->uv_dequant_val_nuq[dq][q][i],
+ av1_get_dequant_val_nuq(uvquant, i, deq->uv_dequant_val_nuq[dq][q][i],
quants->uv_cuml_bins_nuq[dq][q][i], dq);
}
}
@@ -1650,7 +1648,7 @@ void av1_init_quantizer(AV1_COMP *cpi) {
quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
quants->y_zbin[q][i] = quants->y_zbin[q][1];
quants->y_round[q][i] = quants->y_round[q][1];
- cpi->y_dequant[q][i] = cpi->y_dequant[q][1];
+ deq->y_dequant[q][i] = deq->y_dequant[q][1];
quants->uv_quant[q][i] = quants->uv_quant[q][1];
quants->uv_quant_fp[q][i] = quants->uv_quant_fp[q][1];
@@ -1658,11 +1656,19 @@ void av1_init_quantizer(AV1_COMP *cpi) {
quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1];
quants->uv_zbin[q][i] = quants->uv_zbin[q][1];
quants->uv_round[q][i] = quants->uv_round[q][1];
- cpi->uv_dequant[q][i] = cpi->uv_dequant[q][1];
+ deq->uv_dequant[q][i] = deq->uv_dequant[q][1];
}
}
}
+void av1_init_quantizer(AV1_COMP *cpi) {
+ AV1_COMMON *const cm = &cpi->common;
+ QUANTS *const quants = &cpi->quants;
+ Dequants *const dequants = &cpi->dequants;
+ av1_build_quantizer(cm->bit_depth, cm->y_dc_delta_q, cm->uv_dc_delta_q,
+ cm->uv_ac_delta_q, quants, dequants);
+}
+
void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
int segment_id) {
const AV1_COMMON *const cm = &cpi->common;
@@ -1712,11 +1718,12 @@ void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0],
sizeof(cm->giqmatrix[qmlevel][0]));
#endif
- xd->plane[0].dequant = cpi->y_dequant[qindex];
+ xd->plane[0].dequant = cpi->dequants.y_dequant[qindex];
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {
x->plane[0].cuml_bins_nuq[dq] = quants->y_cuml_bins_nuq[dq][qindex];
- xd->plane[0].dequant_val_nuq[dq] = cpi->y_dequant_val_nuq[dq][qindex];
+ xd->plane[0].dequant_val_nuq[dq] =
+ cpi->dequants.y_dequant_val_nuq[dq][qindex];
}
#endif // CONFIG_NEW_QUANT
@@ -1734,11 +1741,12 @@ void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
memcpy(&xd->plane[i].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1],
sizeof(cm->giqmatrix[qmlevel][1]));
#endif
- xd->plane[i].dequant = cpi->uv_dequant[qindex];
+ xd->plane[i].dequant = cpi->dequants.uv_dequant[qindex];
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {
x->plane[i].cuml_bins_nuq[dq] = quants->uv_cuml_bins_nuq[dq][qindex];
- xd->plane[i].dequant_val_nuq[dq] = cpi->uv_dequant_val_nuq[dq][qindex];
+ xd->plane[i].dequant_val_nuq[dq] =
+ cpi->dequants.uv_dequant_val_nuq[dq][qindex];
}
#endif // CONFIG_NEW_QUANT
}
diff --git a/third_party/aom/av1/encoder/av1_quantize.h b/third_party/aom/av1/encoder/av1_quantize.h
index c87b6b7dc..4bc9cccc2 100644
--- a/third_party/aom/av1/encoder/av1_quantize.h
+++ b/third_party/aom/av1/encoder/av1_quantize.h
@@ -69,6 +69,17 @@ typedef struct {
DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
} QUANTS;
+typedef struct {
+ DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width
+ DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width
+#if CONFIG_NEW_QUANT
+ DECLARE_ALIGNED(16, dequant_val_type_nuq,
+ y_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]);
+ DECLARE_ALIGNED(16, dequant_val_type_nuq,
+ uv_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]);
+#endif // CONFIG_NEW_QUANT
+} Dequants;
+
struct AV1_COMP;
struct AV1Common;
@@ -77,6 +88,10 @@ void av1_frame_init_quantizer(struct AV1_COMP *cpi);
void av1_init_plane_quantizers(const struct AV1_COMP *cpi, MACROBLOCK *x,
int segment_id);
+void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
+ int uv_dc_delta_q, int uv_ac_delta_q,
+ QUANTS *const quants, Dequants *const deq);
+
void av1_init_quantizer(struct AV1_COMP *cpi);
void av1_set_quantizer(struct AV1Common *cm, int q);
diff --git a/third_party/aom/av1/encoder/bitstream.c b/third_party/aom/av1/encoder/bitstream.c
index 7cc6179ea..f8378b14d 100644
--- a/third_party/aom/av1/encoder/bitstream.c
+++ b/third_party/aom/av1/encoder/bitstream.c
@@ -63,21 +63,12 @@
static struct av1_token intra_mode_encodings[INTRA_MODES];
static struct av1_token switchable_interp_encodings[SWITCHABLE_FILTERS];
-#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EC_MULTISYMBOL
-static const struct av1_token ext_partition_encodings[EXT_PARTITION_TYPES] = {
- { 0, 1 }, { 4, 3 }, { 12, 4 }, { 7, 3 },
- { 10, 4 }, { 11, 4 }, { 26, 5 }, { 27, 5 }
-};
-#endif
static struct av1_token partition_encodings[PARTITION_TYPES];
-#if !CONFIG_REF_MV
-static struct av1_token inter_mode_encodings[INTER_MODES];
-#endif
#if CONFIG_EXT_INTER
static const struct av1_token
inter_compound_mode_encodings[INTER_COMPOUND_MODES] = {
- { 2, 2 }, { 50, 6 }, { 51, 6 }, { 24, 5 }, { 52, 6 },
- { 53, 6 }, { 54, 6 }, { 55, 6 }, { 0, 1 }, { 7, 3 }
+ { 2, 2 }, { 12, 4 }, { 52, 6 }, { 53, 6 },
+ { 54, 6 }, { 55, 6 }, { 0, 1 }, { 7, 3 }
};
#endif // CONFIG_EXT_INTER
#if CONFIG_PALETTE
@@ -85,16 +76,6 @@ static struct av1_token palette_size_encodings[PALETTE_SIZES];
static struct av1_token palette_color_index_encodings[PALETTE_SIZES]
[PALETTE_COLORS];
#endif // CONFIG_PALETTE
-#if !CONFIG_EC_MULTISYMBOL
-static const struct av1_token tx_size_encodings[MAX_TX_DEPTH][TX_SIZES] = {
- { { 0, 1 }, { 1, 1 } }, // Max tx_size is 8X8
- { { 0, 1 }, { 2, 2 }, { 3, 2 } }, // Max tx_size is 16X16
- { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }, // Max tx_size is 32X32
-#if CONFIG_TX64X64
- { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 } }, // Max tx_size 64X64
-#endif // CONFIG_TX64X64
-};
-#endif
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
static INLINE void write_uniform(aom_writer *w, int n, int v) {
@@ -125,7 +106,9 @@ static struct av1_token intra_filter_encodings[INTRA_FILTERS];
#endif // CONFIG_INTRA_INTERP
#endif // CONFIG_EXT_INTRA
#if CONFIG_EXT_INTER
+#if CONFIG_INTERINTRA
static struct av1_token interintra_mode_encodings[INTERINTRA_MODES];
+#endif
#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
static struct av1_token compound_type_encodings[COMPOUND_TYPES];
#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
@@ -162,9 +145,6 @@ void av1_encode_token_init(void) {
av1_tokens_from_tree(intra_mode_encodings, av1_intra_mode_tree);
av1_tokens_from_tree(switchable_interp_encodings, av1_switchable_interp_tree);
av1_tokens_from_tree(partition_encodings, av1_partition_tree);
-#if !CONFIG_REF_MV
- av1_tokens_from_tree(inter_mode_encodings, av1_inter_mode_tree);
-#endif
#if CONFIG_PALETTE
av1_tokens_from_tree(palette_size_encodings, av1_palette_size_tree);
@@ -178,7 +158,9 @@ void av1_encode_token_init(void) {
av1_tokens_from_tree(intra_filter_encodings, av1_intra_filter_tree);
#endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
#if CONFIG_EXT_INTER
+#if CONFIG_INTERINTRA
av1_tokens_from_tree(interintra_mode_encodings, av1_interintra_mode_tree);
+#endif // CONFIG_INTERINTRA
#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
av1_tokens_from_tree(compound_type_encodings, av1_compound_type_tree);
#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
@@ -195,7 +177,6 @@ void av1_encode_token_init(void) {
av1_switchable_restore_tree);
#endif // CONFIG_LOOP_RESTORATION
-#if CONFIG_EC_MULTISYMBOL
/* This hack is necessary when CONFIG_DUAL_FILTER is enabled because the five
SWITCHABLE_FILTERS are not consecutive, e.g., 0, 1, 2, 3, 4, when doing
an in-order traversal of the av1_switchable_interp_tree structure. */
@@ -218,7 +199,6 @@ void av1_encode_token_init(void) {
av1_intra_mode_tree);
av1_indices_from_tree(av1_inter_mode_ind, av1_inter_mode_inv,
av1_inter_mode_tree);
-#endif
}
static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx,
@@ -228,37 +208,28 @@ static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx,
#if CONFIG_INTRABC
assert(!is_intrabc_block(&mi->mbmi));
#endif // CONFIG_INTRABC
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, av1_intra_mode_ind[mode],
get_y_mode_cdf(frame_ctx, mi, above_mi, left_mi, block),
INTRA_MODES);
(void)cm;
-#else
- av1_write_token(w, av1_intra_mode_tree,
- get_y_mode_probs(cm, mi, above_mi, left_mi, block),
- &intra_mode_encodings[mode]);
- (void)frame_ctx;
-#endif
}
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
static void write_interintra_mode(aom_writer *w, INTERINTRA_MODE mode,
const aom_prob *probs) {
av1_write_token(w, av1_interintra_mode_tree, probs,
&interintra_mode_encodings[mode]);
}
-#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode,
FRAME_CONTEXT *ec_ctx, const int16_t mode_ctx) {
-#if CONFIG_REF_MV
const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
const aom_prob newmv_prob = ec_ctx->newmv_prob[newmv_ctx];
-#define IS_NEWMV_MODE(mode) ((mode) == NEWMV)
- aom_write(w, !IS_NEWMV_MODE(mode), newmv_prob);
+ aom_write(w, mode != NEWMV, newmv_prob);
- if (!IS_NEWMV_MODE(mode)) {
+ if (mode != NEWMV) {
const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
const aom_prob zeromv_prob = ec_ctx->zeromv_prob[zeromv_ctx];
@@ -281,25 +252,8 @@ static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode,
aom_write(w, mode != NEARESTMV, refmv_prob);
}
}
-
-#undef IS_NEWMV_MODE
-
-#else // !CONFIG_REF_MV
- assert(is_inter_mode(mode));
-#if CONFIG_EC_MULTISYMBOL
- aom_write_symbol(w, av1_inter_mode_ind[INTER_OFFSET(mode)],
- ec_ctx->inter_mode_cdf[mode_ctx], INTER_MODES);
-#else
- {
- const aom_prob *const inter_probs = ec_ctx->inter_mode_probs[mode_ctx];
- av1_write_token(w, av1_inter_mode_tree, inter_probs,
- &inter_mode_encodings[INTER_OFFSET(mode)]);
- }
-#endif
-#endif
}
-#if CONFIG_REF_MV
static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
const MB_MODE_INFO_EXT *mbmi_ext, aom_writer *w) {
uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
@@ -341,7 +295,6 @@ static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
return;
}
}
-#endif
#if CONFIG_EXT_INTER
static void write_inter_compound_mode(AV1_COMMON *cm, aom_writer *w,
@@ -409,22 +362,22 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
- int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row,
+ int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row,
mbmi->sb_type, tx_size);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
if (depth == MAX_VARTX_DEPTH) {
- txfm_partition_update(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row, tx_size, tx_size);
+ txfm_partition_update(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row, tx_size, tx_size);
return;
}
if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) {
aom_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
- txfm_partition_update(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row, tx_size, tx_size);
+ txfm_partition_update(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row, tx_size, tx_size);
} else {
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bsl = tx_size_wide_unit[sub_txs];
@@ -433,8 +386,8 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
aom_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
if (tx_size == TX_8X8) {
- txfm_partition_update(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row, sub_txs, tx_size);
+ txfm_partition_update(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row, sub_txs, tx_size);
return;
}
@@ -482,22 +435,18 @@ static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd,
const int depth = tx_size_to_depth(coded_tx_size);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
- assert(
- IMPLIES(is_rect_tx(tx_size), tx_size == max_txsize_rect_lookup[bsize]));
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, depth, ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
tx_size_cat + 2);
-#else
- av1_write_token(w, av1_tx_size_tree[tx_size_cat],
- ec_ctx->tx_size_probs[tx_size_cat][tx_size_ctx],
- &tx_size_encodings[tx_size_cat][depth]);
-#endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
+ aom_write(w, tx_size == quarter_txsize_lookup[bsize],
+ cm->fc->quarter_tx_size_prob);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
}
}
-#if CONFIG_REF_MV
static void update_inter_mode_probs(AV1_COMMON *cm, aom_writer *w,
FRAME_COUNTS *counts) {
int i;
@@ -519,7 +468,6 @@ static void update_inter_mode_probs(AV1_COMMON *cm, aom_writer *w,
av1_cond_prob_diff_update(w, &cm->fc->drl_prob[i], counts->drl_mode[i],
probwt);
}
-#endif
#if CONFIG_EXT_INTER
static void update_inter_compound_mode_probs(AV1_COMMON *cm, int probwt,
@@ -598,17 +546,8 @@ static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd,
(void)xd;
#endif
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, AOMMIN(abs, DELTA_Q_SMALL), ec_ctx->delta_q_cdf,
DELTA_Q_PROBS + 1);
-#else
- int i = 0;
- while (i < DELTA_Q_SMALL && i <= abs) {
- int bit = (i < abs);
- aom_write(w, bit, ec_ctx->delta_q_prob[i]);
- i++;
- }
-#endif
if (!smallval) {
rem_bits = OD_ILOG_NZ(abs - 1) - 1;
@@ -655,17 +594,8 @@ static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
(void)xd;
#endif
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf,
DELTA_LF_PROBS + 1);
-#else
- int i = 0;
- while (i < DELTA_LF_SMALL && i <= abs) {
- int bit = (i < abs);
- aom_write(w, bit, ec_ctx->delta_lf_prob[i]);
- i++;
- }
-#endif // CONFIG_EC_MULTISYMBOL
if (!smallval) {
rem_bits = OD_ILOG_NZ(abs - 1) - 1;
@@ -908,7 +838,7 @@ static INLINE void write_coeff_extra(const aom_prob *pb, int value,
}
#endif
-#if CONFIG_NEW_TOKENSET && !CONFIG_LV_MAP
+#if !CONFIG_LV_MAP
static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp,
const TOKENEXTRA *const stop,
aom_bit_depth_t bit_depth, const TX_SIZE tx_size,
@@ -921,18 +851,22 @@ static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp,
while (p < stop && p->token != EOSB_TOKEN) {
const int token = p->token;
+ const int eob_val = p->eob_val;
if (token == BLOCK_Z_TOKEN) {
aom_write_symbol(w, 0, *p->head_cdf, HEAD_TOKENS + 1);
p++;
+#if CONFIG_VAR_TX
+ break;
+#endif
continue;
}
const av1_extra_bit *const extra_bits = &av1_extra_bits[token];
- if (p->eob_val == LAST_EOB) {
+ if (eob_val == LAST_EOB) {
// Just code a flag indicating whether the value is >1 or 1.
aom_write_bit(w, token != ONE_TOKEN);
} else {
- int comb_symb = 2 * AOMMIN(token, TWO_TOKEN) - p->eob_val + p->first_val;
+ int comb_symb = 2 * AOMMIN(token, TWO_TOKEN) - eob_val + p->first_val;
aom_write_symbol(w, comb_symb, *p->head_cdf, HEAD_TOKENS + p->first_val);
}
if (token > ONE_TOKEN) {
@@ -966,104 +900,13 @@ static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp,
#if CONFIG_VAR_TX
++count;
- if (token == EOB_TOKEN || count == seg_eob) break;
-#endif
- }
-
- *tp = p;
-}
-#else // CONFIG_NEW_TOKENSET
-#if !CONFIG_LV_MAP
-static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp,
- const TOKENEXTRA *const stop,
- aom_bit_depth_t bit_depth, const TX_SIZE tx_size,
- TOKEN_STATS *token_stats) {
- const TOKENEXTRA *p = *tp;
-#if CONFIG_VAR_TX
- int count = 0;
- const int seg_eob = tx_size_2d[tx_size];
-#endif
-
- while (p < stop && p->token != EOSB_TOKEN) {
- const int token = p->token;
-#if !CONFIG_EC_MULTISYMBOL
- const struct av1_token *const coef_encoding = &av1_coef_encodings[token];
- int coef_value = coef_encoding->value;
- int coef_length = coef_encoding->len;
-#endif // !CONFIG_EC_MULTISYMBOL
- const av1_extra_bit *const extra_bits = &av1_extra_bits[token];
-
-#if CONFIG_EC_MULTISYMBOL
- /* skip one or two nodes */
- if (!p->skip_eob_node)
- aom_write_record(w, token != EOB_TOKEN, p->context_tree[0], token_stats);
- if (token != EOB_TOKEN) {
- aom_write_record(w, token != ZERO_TOKEN, p->context_tree[1], token_stats);
- if (token != ZERO_TOKEN) {
- aom_write_symbol(w, token - ONE_TOKEN, *p->token_cdf,
- CATEGORY6_TOKEN - ONE_TOKEN + 1);
- }
- }
-#else
- /* skip one or two nodes */
- if (p->skip_eob_node)
- coef_length -= p->skip_eob_node;
- else
- aom_write_record(w, token != EOB_TOKEN, p->context_tree[0], token_stats);
-
- if (token != EOB_TOKEN) {
- aom_write_record(w, token != ZERO_TOKEN, p->context_tree[1], token_stats);
-
- if (token != ZERO_TOKEN) {
- aom_write_record(w, token != ONE_TOKEN, p->context_tree[2],
- token_stats);
-
- if (token != ONE_TOKEN) {
- const int unconstrained_len = UNCONSTRAINED_NODES - p->skip_eob_node;
- aom_write_tree_record(
- w, av1_coef_con_tree,
- av1_pareto8_full[p->context_tree[PIVOT_NODE] - 1], coef_value,
- coef_length - unconstrained_len, 0, token_stats);
- }
- }
- }
-#endif // CONFIG_EC_MULTISYMBOL
-
- if (extra_bits->base_val) {
- const int bit_string = p->extra;
- const int bit_string_length = extra_bits->len; // Length of extra bits to
- // be written excluding
- // the sign bit.
- int skip_bits = (extra_bits->base_val == CAT6_MIN_VAL)
- ? (int)sizeof(av1_cat6_prob) -
- av1_get_cat6_extrabits_size(tx_size, bit_depth)
- : 0;
-
- assert(!(bit_string >> (bit_string_length - skip_bits + 1)));
- if (bit_string_length > 0) {
-#if CONFIG_NEW_MULTISYMBOL
- skip_bits &= ~3;
- write_coeff_extra(extra_bits->cdf, bit_string >> 1,
- bit_string_length - skip_bits, w);
-#else
- write_coeff_extra(extra_bits->prob, bit_string >> 1, bit_string_length,
- skip_bits, w, token_stats);
-#endif
- }
- aom_write_bit_record(w, bit_string & 1, token_stats);
- }
- ++p;
-
-#if CONFIG_VAR_TX
- ++count;
- if (token == EOB_TOKEN || count == seg_eob) break;
+ if (eob_val == EARLY_EOB || count == seg_eob) break;
#endif
}
*tp = p;
}
#endif // !CONFIG_LV_MAP
-#endif // CONFIG_NEW_TOKENSET
#else // !CONFIG_PVQ
static PVQ_INFO *get_pvq_block(PVQ_QUEUE *pvq_q) {
PVQ_INFO *pvq;
@@ -1150,6 +993,80 @@ static void pack_pvq_tokens(aom_writer *w, MACROBLOCK *const x,
#endif // !CONFIG_PVG
#if CONFIG_VAR_TX && !CONFIG_COEF_INTERLEAVE
+#if CONFIG_LV_MAP
+static void pack_txb_tokens(aom_writer *w,
+#if CONFIG_LV_MAP
+ AV1_COMMON *cm,
+#endif // CONFIG_LV_MAP
+ const TOKENEXTRA **tp,
+ const TOKENEXTRA *const tok_end,
+#if CONFIG_PVQ || CONFIG_LV_MAP
+ MACROBLOCK *const x,
+#endif
+ MACROBLOCKD *xd, MB_MODE_INFO *mbmi, int plane,
+ BLOCK_SIZE plane_bsize, aom_bit_depth_t bit_depth,
+ int block, int blk_row, int blk_col,
+ TX_SIZE tx_size, TOKEN_STATS *token_stats) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ TX_SIZE plane_tx_size;
+ const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
+ const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ plane_tx_size =
+ plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
+ : mbmi->inter_tx_size[tx_row][tx_col];
+
+ if (tx_size == plane_tx_size) {
+ TOKEN_STATS tmp_token_stats;
+ init_token_stats(&tmp_token_stats);
+
+#if !CONFIG_PVQ
+ tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block);
+ uint16_t eob = x->mbmi_ext->eobs[plane][block];
+ TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block],
+ x->mbmi_ext->dc_sign_ctx[plane][block] };
+ av1_write_coeffs_txb(cm, xd, w, block, plane, tcoeff, eob, &txb_ctx);
+#else
+ pack_pvq_tokens(w, x, xd, plane, bsize, tx_size);
+#endif
+#if CONFIG_RD_DEBUG
+ token_stats->txb_coeff_cost_map[blk_row][blk_col] = tmp_token_stats.cost;
+ token_stats->cost += tmp_token_stats.cost;
+#endif
+ } else {
+ const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+ const int bsl = tx_size_wide_unit[sub_txs];
+ int i;
+
+ assert(bsl > 0);
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + (i >> 1) * bsl;
+ const int offsetc = blk_col + (i & 0x01) * bsl;
+ const int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+
+ pack_txb_tokens(w,
+#if CONFIG_LV_MAP
+ cm,
+#endif
+ tp, tok_end,
+#if CONFIG_PVQ || CONFIG_LV_MAP
+ x,
+#endif
+ xd, mbmi, plane, plane_bsize, bit_depth, block, offsetr,
+ offsetc, sub_txs, token_stats);
+ block += step;
+ }
+ }
+}
+#else // CONFIG_LV_MAP
static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp,
const TOKENEXTRA *const tok_end,
#if CONFIG_PVQ
@@ -1209,16 +1126,13 @@ static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp,
}
}
}
-#endif
+#endif // CONFIG_LV_MAP
+#endif // CONFIG_VAR_TX
static void write_segment_id(aom_writer *w, const struct segmentation *seg,
struct segmentation_probs *segp, int segment_id) {
if (seg->enabled && seg->update_map) {
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, segment_id, segp->tree_cdf, MAX_SEGMENTS);
-#else
- aom_write_tree(w, av1_segment_tree, segp->tree_probs, segment_id, 3, 0);
-#endif
}
}
@@ -1242,7 +1156,7 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
#if SUB8X8_COMP_REF
aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd));
#else
- if (mbmi->sb_type >= BLOCK_8X8)
+ if (mbmi->sb_type != BLOCK_4X4)
aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd));
#endif
} else {
@@ -1307,7 +1221,9 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
#if CONFIG_FILTER_INTRA
static void write_filter_intra_mode_info(const AV1_COMMON *const cm,
+ const MACROBLOCKD *xd,
const MB_MODE_INFO *const mbmi,
+ int mi_row, int mi_col,
aom_writer *w) {
if (mbmi->mode == DC_PRED
#if CONFIG_PALETTE
@@ -1323,6 +1239,17 @@ static void write_filter_intra_mode_info(const AV1_COMMON *const cm,
}
}
+#if CONFIG_CB4X4
+ if (!is_chroma_reference(mi_row, mi_col, mbmi->sb_type,
+ xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y))
+ return;
+#else
+ (void)xd;
+ (void)mi_row;
+ (void)mi_col;
+#endif // CONFIG_CB4X4
+
if (mbmi->uv_mode == DC_PRED
#if CONFIG_PALETTE
&& mbmi->palette_mode_info.palette_size[1] == 0
@@ -1358,15 +1285,9 @@ static void write_intra_angle_info(const MACROBLOCKD *xd,
#if CONFIG_INTRA_INTERP
p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
if (av1_is_intra_filter_switchable(p_angle)) {
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, mbmi->intra_filter,
ec_ctx->intra_filter_cdf[intra_filter_ctx],
INTRA_FILTERS);
-#else
- av1_write_token(w, av1_intra_filter_tree,
- ec_ctx->intra_filter_probs[intra_filter_ctx],
- &intra_filter_encodings[mbmi->intra_filter]);
-#endif // CONFIG_EC_MULTISYMBOL
}
#endif // CONFIG_INTRA_INTERP
}
@@ -1409,15 +1330,9 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
(mbmi->ref_frame[1] > INTRA_FRAME &&
has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, av1_switchable_interp_ind[mbmi->interp_filter[dir]],
ec_ctx->switchable_interp_cdf[ctx],
SWITCHABLE_FILTERS);
-#else
- av1_write_token(w, av1_switchable_interp_tree,
- ec_ctx->switchable_interp_prob[ctx],
- &switchable_interp_encodings[mbmi->interp_filter[dir]]);
-#endif
++cpi->interp_filter_selected[0][mbmi->interp_filter[dir]];
} else {
assert(mbmi->interp_filter[dir] == EIGHTTAP_REGULAR);
@@ -1426,14 +1341,8 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
#else
{
const int ctx = av1_get_pred_context_switchable_interp(xd);
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, av1_switchable_interp_ind[mbmi->interp_filter],
ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS);
-#else
- av1_write_token(w, av1_switchable_interp_tree,
- ec_ctx->switchable_interp_prob[ctx],
- &switchable_interp_encodings[mbmi->interp_filter]);
-#endif
++cpi->interp_filter_selected[0][mbmi->interp_filter];
}
#endif // CONFIG_DUAL_FILTER
@@ -1442,48 +1351,91 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
#if CONFIG_PALETTE
#if CONFIG_PALETTE_DELTA_ENCODING
-// Write luma palette color values with delta encoding. Write the first value as
-// literal, and the deltas between each value and the previous one. The luma
-// palette is sorted so each delta is larger than 0.
-static void write_palette_colors_y(const PALETTE_MODE_INFO *const pmi,
- int bit_depth, aom_writer *w) {
- const int n = pmi->palette_size[0];
- int min_bits, i;
- int bits = av1_get_palette_delta_bits_y(pmi, bit_depth, &min_bits);
+// Transmit color values with delta encoding. Write the first value as
+// literal, and the deltas between each value and the previous one. "min_val" is
+// the smallest possible value of the deltas.
+static void delta_encode_palette_colors(const int *colors, int num,
+ int bit_depth, int min_val,
+ aom_writer *w) {
+ if (num <= 0) return;
+ assert(colors[0] < (1 << bit_depth));
+ aom_write_literal(w, colors[0], bit_depth);
+ if (num == 1) return;
+ int max_delta = 0;
+ int deltas[PALETTE_MAX_SIZE];
+ memset(deltas, 0, sizeof(deltas));
+ for (int i = 1; i < num; ++i) {
+ assert(colors[i] < (1 << bit_depth));
+ const int delta = colors[i] - colors[i - 1];
+ deltas[i - 1] = delta;
+ assert(delta >= min_val);
+ if (delta > max_delta) max_delta = delta;
+ }
+ const int min_bits = bit_depth - 3;
+ int bits = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits);
+ assert(bits <= bit_depth);
+ int range = (1 << bit_depth) - colors[0] - min_val;
aom_write_literal(w, bits - min_bits, 2);
- aom_write_literal(w, pmi->palette_colors[0], bit_depth);
- for (i = 1; i < n; ++i) {
- aom_write_literal(
- w, pmi->palette_colors[i] - pmi->palette_colors[i - 1] - 1, bits);
- bits =
- AOMMIN(bits, av1_ceil_log2((1 << bit_depth) - pmi->palette_colors[i]));
+ for (int i = 0; i < num - 1; ++i) {
+ aom_write_literal(w, deltas[i] - min_val, bits);
+ range -= deltas[i];
+ bits = AOMMIN(bits, av1_ceil_log2(range));
}
}
-// Write chroma palette color values. Use delta encoding for u channel as its
-// palette is sorted. For v channel, either use delta encoding or transmit
-// raw values directly, whichever costs less.
-static void write_palette_colors_uv(const PALETTE_MODE_INFO *const pmi,
+// Transmit luma palette color values. First signal if each color in the color
+// cache is used. Those colors that are not in the cache are transmitted with
+// delta encoding.
+static void write_palette_colors_y(const MACROBLOCKD *const xd,
+ const PALETTE_MODE_INFO *const pmi,
+ int bit_depth, aom_writer *w) {
+ const int n = pmi->palette_size[0];
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ uint16_t color_cache[2 * PALETTE_MAX_SIZE];
+ const int n_cache = av1_get_palette_cache(above_mi, left_mi, 0, color_cache);
+ int out_cache_colors[PALETTE_MAX_SIZE];
+ uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
+ const int n_out_cache =
+ av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n,
+ cache_color_found, out_cache_colors);
+ int n_in_cache = 0;
+ for (int i = 0; i < n_cache && n_in_cache < n; ++i) {
+ const int found = cache_color_found[i];
+ aom_write_bit(w, found);
+ n_in_cache += found;
+ }
+ assert(n_in_cache + n_out_cache == n);
+ delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 1, w);
+}
+
+// Write chroma palette color values. U channel is handled similarly to the luma
+// channel. For v channel, either use delta encoding or transmit raw values
+// directly, whichever costs less.
+static void write_palette_colors_uv(const MACROBLOCKD *const xd,
+ const PALETTE_MODE_INFO *const pmi,
int bit_depth, aom_writer *w) {
- int i;
const int n = pmi->palette_size[1];
-#if CONFIG_HIGHBITDEPTH
const uint16_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE;
const uint16_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE;
-#else
- const uint8_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE;
- const uint8_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE;
-#endif // CONFIG_HIGHBITDEPTH
// U channel colors.
- int min_bits_u = 0;
- int bits_u = av1_get_palette_delta_bits_u(pmi, bit_depth, &min_bits_u);
- aom_write_literal(w, bits_u - min_bits_u, 2);
- aom_write_literal(w, colors_u[0], bit_depth);
- for (i = 1; i < n; ++i) {
- aom_write_literal(w, colors_u[i] - colors_u[i - 1], bits_u);
- bits_u = AOMMIN(bits_u, av1_ceil_log2(1 + (1 << bit_depth) - colors_u[i]));
- }
- // V channel colors.
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ uint16_t color_cache[2 * PALETTE_MAX_SIZE];
+ const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache);
+ int out_cache_colors[PALETTE_MAX_SIZE];
+ uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
+ const int n_out_cache = av1_index_color_cache(
+ color_cache, n_cache, colors_u, n, cache_color_found, out_cache_colors);
+ int n_in_cache = 0;
+ for (int i = 0; i < n_cache && n_in_cache < n; ++i) {
+ const int found = cache_color_found[i];
+ aom_write_bit(w, found);
+ n_in_cache += found;
+ }
+ delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 0, w);
+
+ // V channel colors. Don't use color cache as the colors are not sorted.
const int max_val = 1 << bit_depth;
int zero_count = 0, min_bits_v = 0;
int bits_v =
@@ -1492,10 +1444,12 @@ static void write_palette_colors_uv(const PALETTE_MODE_INFO *const pmi,
2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count;
const int rate_using_raw = bit_depth * n;
if (rate_using_delta < rate_using_raw) { // delta encoding
+ assert(colors_v[0] < (1 << bit_depth));
aom_write_bit(w, 1);
aom_write_literal(w, bits_v - min_bits_v, 2);
aom_write_literal(w, colors_v[0], bit_depth);
- for (i = 1; i < n; ++i) {
+ for (int i = 1; i < n; ++i) {
+ assert(colors_v[i] < (1 << bit_depth));
if (colors_v[i] == colors_v[i - 1]) { // No need to signal sign bit.
aom_write_literal(w, 0, bits_v);
continue;
@@ -1512,7 +1466,10 @@ static void write_palette_colors_uv(const PALETTE_MODE_INFO *const pmi,
}
} else { // Transmit raw values.
aom_write_bit(w, 0);
- for (i = 0; i < n; ++i) aom_write_literal(w, colors_v[i], bit_depth);
+ for (int i = 0; i < n; ++i) {
+ assert(colors_v[i] < (1 << bit_depth));
+ aom_write_literal(w, colors_v[i], bit_depth);
+ }
}
}
#endif // CONFIG_PALETTE_DELTA_ENCODING
@@ -1542,11 +1499,12 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
av1_default_palette_y_size_prob[bsize - BLOCK_8X8],
&palette_size_encodings[n - PALETTE_MIN_SIZE]);
#if CONFIG_PALETTE_DELTA_ENCODING
- write_palette_colors_y(pmi, cm->bit_depth, w);
+ write_palette_colors_y(xd, pmi, cm->bit_depth, w);
#else
- int i;
- for (i = 0; i < n; ++i)
+ for (int i = 0; i < n; ++i) {
+ assert(pmi->palette_colors[i] < (1 << cm->bit_depth));
aom_write_literal(w, pmi->palette_colors[i], cm->bit_depth);
+ }
#endif // CONFIG_PALETTE_DELTA_ENCODING
write_uniform(w, n, pmi->palette_first_color_idx[0]);
}
@@ -1561,10 +1519,13 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
av1_default_palette_uv_size_prob[bsize - BLOCK_8X8],
&palette_size_encodings[n - PALETTE_MIN_SIZE]);
#if CONFIG_PALETTE_DELTA_ENCODING
- write_palette_colors_uv(pmi, cm->bit_depth, w);
+ write_palette_colors_uv(xd, pmi, cm->bit_depth, w);
#else
- int i;
- for (i = 0; i < n; ++i) {
+ for (int i = 0; i < n; ++i) {
+ assert(pmi->palette_colors[PALETTE_MAX_SIZE + i] <
+ (1 << cm->bit_depth));
+ assert(pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] <
+ (1 << cm->bit_depth));
aom_write_literal(w, pmi->palette_colors[PALETTE_MAX_SIZE + i],
cm->bit_depth);
aom_write_literal(w, pmi->palette_colors[2 * PALETTE_MAX_SIZE + i],
@@ -1625,30 +1586,17 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
if (is_inter) {
assert(ext_tx_used_inter[eset][tx_type]);
if (eset > 0) {
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, av1_ext_tx_inter_ind[eset][tx_type],
ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
ext_tx_cnt_inter[eset]);
-#else
- av1_write_token(w, av1_ext_tx_inter_tree[eset],
- ec_ctx->inter_ext_tx_prob[eset][square_tx_size],
- &ext_tx_inter_encodings[eset][tx_type]);
-#endif
}
} else if (ALLOW_INTRA_EXT_TX) {
assert(ext_tx_used_intra[eset][tx_type]);
if (eset > 0) {
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(
w, av1_ext_tx_intra_ind[eset][tx_type],
ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
ext_tx_cnt_intra[eset]);
-#else
- av1_write_token(
- w, av1_ext_tx_intra_tree[eset],
- ec_ctx->intra_ext_tx_prob[eset][square_tx_size][mbmi->mode],
- &ext_tx_intra_encodings[eset][tx_type]);
-#endif
}
}
}
@@ -1662,28 +1610,14 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
#endif // CONFIG_SUPERTX
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
if (is_inter) {
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, av1_ext_tx_ind[tx_type],
ec_ctx->inter_ext_tx_cdf[tx_size], TX_TYPES);
-#else
- av1_write_token(w, av1_ext_tx_tree, ec_ctx->inter_ext_tx_prob[tx_size],
- &ext_tx_encodings[tx_type]);
-#endif
} else {
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(
w, av1_ext_tx_ind[tx_type],
ec_ctx->intra_ext_tx_cdf[tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]],
TX_TYPES);
-#else
- av1_write_token(
- w, av1_ext_tx_tree,
- ec_ctx
- ->intra_ext_tx_prob[tx_size]
- [intra_mode_to_tx_type_context[mbmi->mode]],
- &ext_tx_encodings[tx_type]);
-#endif
}
}
#endif // CONFIG_EXT_TX
@@ -1692,29 +1626,45 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
static void write_intra_mode(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize,
PREDICTION_MODE mode, aom_writer *w) {
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, av1_intra_mode_ind[mode],
frame_ctx->y_mode_cdf[size_group_lookup[bsize]],
INTRA_MODES);
-#else
- av1_write_token(w, av1_intra_mode_tree,
- frame_ctx->y_mode_prob[size_group_lookup[bsize]],
- &intra_mode_encodings[mode]);
-#endif
}
static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx,
PREDICTION_MODE uv_mode, PREDICTION_MODE y_mode,
aom_writer *w) {
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, av1_intra_mode_ind[uv_mode],
frame_ctx->uv_mode_cdf[y_mode], INTRA_MODES);
-#else
- av1_write_token(w, av1_intra_mode_tree, frame_ctx->uv_mode_prob[y_mode],
- &intra_mode_encodings[uv_mode]);
-#endif
}
+#if CONFIG_CFL
+static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, int skip, int ind,
+ const CFL_SIGN_TYPE signs[CFL_SIGNS],
+ aom_writer *w) {
+ if (skip) {
+ assert(ind == 0);
+ assert(signs[CFL_PRED_U] == CFL_SIGN_POS);
+ assert(signs[CFL_PRED_V] == CFL_SIGN_POS);
+ } else {
+ // Check for uninitialized signs
+ if (cfl_alpha_codes[ind][CFL_PRED_U] == 0)
+ assert(signs[CFL_PRED_U] == CFL_SIGN_POS);
+ if (cfl_alpha_codes[ind][CFL_PRED_V] == 0)
+ assert(signs[CFL_PRED_V] == CFL_SIGN_POS);
+
+ // Write a symbol representing a combination of alpha Cb and alpha Cr.
+ aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
+
+ // Signs are only signaled for nonzero codes.
+ if (cfl_alpha_codes[ind][CFL_PRED_U] != 0)
+ aom_write_bit(w, signs[CFL_PRED_U]);
+ if (cfl_alpha_codes[ind][CFL_PRED_V] != 0)
+ aom_write_bit(w, signs[CFL_PRED_V]);
+ }
+}
+#endif
+
static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
const int mi_col,
#if CONFIG_SUPERTX
@@ -1734,9 +1684,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#else
FRAME_CONTEXT *ec_ctx = cm->fc;
#endif
-#if !CONFIG_REF_MV
- nmv_context *nmvc = &ec_ctx->nmvc;
-#endif
const MODE_INFO *mi = xd->mi[0];
const struct segmentation *const seg = &cm->seg;
@@ -1859,12 +1806,23 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
}
#if CONFIG_CB4X4
if (is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y))
+ xd->plane[1].subsampling_y)) {
write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mode, w);
#else // !CONFIG_CB4X4
write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mode, w);
#endif // CONFIG_CB4X4
+#if CONFIG_CFL
+ if (mbmi->uv_mode == DC_PRED) {
+ write_cfl_alphas(ec_ctx, mbmi->skip, mbmi->cfl_alpha_idx,
+ mbmi->cfl_alpha_signs, w);
+ }
+#endif
+
+#if CONFIG_CB4X4
+ }
+#endif
+
#if CONFIG_EXT_INTRA
write_intra_angle_info(xd, ec_ctx, w);
#endif // CONFIG_EXT_INTRA
@@ -1874,13 +1832,12 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif // CONFIG_PALETTE
#if CONFIG_FILTER_INTRA
if (bsize >= BLOCK_8X8 || unify_bsize)
- write_filter_intra_mode_info(cm, mbmi, w);
+ write_filter_intra_mode_info(cm, xd, mbmi, mi_row, mi_col, w);
#endif // CONFIG_FILTER_INTRA
} else {
int16_t mode_ctx;
write_ref_frames(cm, xd, w);
-#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (is_compound)
mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
@@ -1888,9 +1845,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif // CONFIG_EXT_INTER
mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
mbmi->ref_frame, bsize, -1);
-#else // CONFIG_REF_MV
- mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
-#endif // CONFIG_REF_MV
// If segment skip is not enabled code the mode.
if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
@@ -1902,7 +1856,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif // CONFIG_EXT_INTER
write_inter_mode(w, mode, ec_ctx, mode_ctx);
-#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (mode == NEWMV || mode == NEW_NEWMV ||
have_nearmv_in_inter_mode(mode))
@@ -1912,7 +1865,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
write_drl_idx(cm, mbmi, mbmi_ext, w);
else
assert(mbmi->ref_mv_idx == 0);
-#endif
}
}
@@ -1928,13 +1880,11 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int j = idy * 2 + idx;
const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
-#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (!is_compound)
#endif // CONFIG_EXT_INTER
mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
mbmi->ref_frame, bsize, j);
-#endif
#if CONFIG_EXT_INTER
if (is_inter_compound_mode(b_mode))
write_inter_compound_mode(cm, w, b_mode, mode_ctx);
@@ -1948,45 +1898,35 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
if (b_mode == NEWMV) {
#endif // CONFIG_EXT_INTER
for (ref = 0; ref < 1 + is_compound; ++ref) {
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], ref,
mbmi->ref_mv_idx);
nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx];
-#endif
av1_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
#if CONFIG_EXT_INTER
&mi->bmi[j].ref_mv[ref].as_mv,
#else
-#if CONFIG_REF_MV
&mi->bmi[j].pred_mv[ref].as_mv,
-#else
- &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
-#endif // CONFIG_REF_MV
#endif // CONFIG_EXT_INTER
nmvc, allow_hp);
}
}
#if CONFIG_EXT_INTER
else if (b_mode == NEAREST_NEWMV || b_mode == NEAR_NEWMV) {
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], 1,
mbmi->ref_mv_idx);
nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx];
-#endif
av1_encode_mv(cpi, w, &mi->bmi[j].as_mv[1].as_mv,
&mi->bmi[j].ref_mv[1].as_mv, nmvc, allow_hp);
} else if (b_mode == NEW_NEARESTMV || b_mode == NEW_NEARMV) {
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], 0,
mbmi->ref_mv_idx);
nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx];
-#endif
av1_encode_mv(cpi, w, &mi->bmi[j].as_mv[0].as_mv,
&mi->bmi[j].ref_mv[0].as_mv, nmvc, allow_hp);
}
@@ -2001,37 +1941,31 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif // CONFIG_EXT_INTER
int_mv ref_mv;
for (ref = 0; ref < 1 + is_compound; ++ref) {
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], ref,
mbmi->ref_mv_idx);
nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx];
-#endif
ref_mv = mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0];
av1_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, &ref_mv.as_mv, nmvc,
allow_hp);
}
#if CONFIG_EXT_INTER
} else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx];
-#endif
av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv,
&mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv, nmvc,
allow_hp);
} else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx];
-#endif
av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv,
&mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv, nmvc,
allow_hp);
@@ -2039,12 +1973,12 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
}
}
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
if (cpi->common.reference_mode != COMPOUND_REFERENCE &&
#if CONFIG_SUPERTX
!supertx_enabled &&
#endif // CONFIG_SUPERTX
- is_interintra_allowed(mbmi)) {
+ cpi->common.allow_interintra_compound && is_interintra_allowed(mbmi)) {
const int interintra = mbmi->ref_frame[1] == INTRA_FRAME;
const int bsize_group = size_group_lookup[bsize];
aom_write(w, interintra, cm->fc->interintra_prob[bsize_group]);
@@ -2062,7 +1996,7 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
}
}
}
-#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_SUPERTX
@@ -2082,21 +2016,23 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif // CONFIG_MOTION_VAR
&& is_any_masked_compound_used(bsize)) {
#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
- av1_write_token(w, av1_compound_type_tree,
- cm->fc->compound_type_prob[bsize],
- &compound_type_encodings[mbmi->interinter_compound_type]);
-#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
+ if (cm->allow_masked_compound) {
+ av1_write_token(
+ w, av1_compound_type_tree, cm->fc->compound_type_prob[bsize],
+ &compound_type_encodings[mbmi->interinter_compound_type]);
#if CONFIG_WEDGE
- if (mbmi->interinter_compound_type == COMPOUND_WEDGE) {
- aom_write_literal(w, mbmi->wedge_index, get_wedge_bits_lookup(bsize));
- aom_write_bit(w, mbmi->wedge_sign);
- }
+ if (mbmi->interinter_compound_type == COMPOUND_WEDGE) {
+ aom_write_literal(w, mbmi->wedge_index, get_wedge_bits_lookup(bsize));
+ aom_write_bit(w, mbmi->wedge_sign);
+ }
#endif // CONFIG_WEDGE
#if CONFIG_COMPOUND_SEGMENT
- if (mbmi->interinter_compound_type == COMPOUND_SEG) {
- aom_write_literal(w, mbmi->mask_type, MAX_SEG_MASK_BITS);
- }
+ if (mbmi->interinter_compound_type == COMPOUND_SEG) {
+ aom_write_literal(w, mbmi->mask_type, MAX_SEG_MASK_BITS);
+ }
#endif // CONFIG_COMPOUND_SEGMENT
+ }
+#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
}
#endif // CONFIG_EXT_INTER
@@ -2114,15 +2050,17 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
#endif // !CONFIG_TXK_SEL
}
+static void write_mb_modes_kf(AV1_COMMON *cm,
#if CONFIG_DELTA_Q
-static void write_mb_modes_kf(AV1_COMMON *cm, MACROBLOCKD *xd, const int mi_row,
- const int mi_col, aom_writer *w) {
- int skip;
+ MACROBLOCKD *xd,
#else
-static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd,
+ const MACROBLOCKD *xd,
+#endif // CONFIG_DELTA_Q
+#if CONFIG_INTRABC
+ const MB_MODE_INFO_EXT *mbmi_ext,
+#endif // CONFIG_INTRABC
const int mi_row, const int mi_col,
aom_writer *w) {
-#endif
const struct segmentation *const seg = &cm->seg;
struct segmentation_probs *const segp = &cm->fc->seg;
const MODE_INFO *const mi = xd->mi[0];
@@ -2147,7 +2085,7 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd,
if (seg->update_map) write_segment_id(w, seg, segp, mbmi->segment_id);
#if CONFIG_DELTA_Q
- skip = write_skip(cm, xd, mbmi->segment_id, mi, w);
+ const int skip = write_skip(cm, xd, mbmi->segment_id, mi, w);
if (cm->delta_q_present_flag) {
int super_block_upper_left =
((mi_row & MAX_MIB_MASK) == 0) && ((mi_col & MAX_MIB_MASK) == 0);
@@ -2188,13 +2126,19 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd,
#if CONFIG_INTRABC
if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools) {
int use_intrabc = is_intrabc_block(mbmi);
- aom_write(w, use_intrabc, INTRABC_PROB);
+ aom_write(w, use_intrabc, ec_ctx->intrabc_prob);
if (use_intrabc) {
assert(mbmi->mode == DC_PRED);
assert(mbmi->uv_mode == DC_PRED);
- int_mv dv_ref;
- av1_find_ref_dv(&dv_ref, mi_row, mi_col);
+ int_mv dv_ref = mbmi_ext->ref_mvs[INTRA_FRAME][0];
av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc);
+#if CONFIG_EXT_TX && !CONFIG_TXK_SEL
+ av1_write_tx_type(cm, xd,
+#if CONFIG_SUPERTX
+ 0,
+#endif
+ w);
+#endif // CONFIG_EXT_TX && !CONFIG_TXK_SEL
return;
}
}
@@ -2218,12 +2162,22 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd,
#if CONFIG_CB4X4
if (is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y))
+ xd->plane[1].subsampling_y)) {
write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mbmi->mode, w);
#else // !CONFIG_CB4X4
write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mbmi->mode, w);
#endif // CONFIG_CB4X4
+#if CONFIG_CFL
+ if (mbmi->uv_mode == DC_PRED) {
+ write_cfl_alphas(ec_ctx, mbmi->skip, mbmi->cfl_alpha_idx,
+ mbmi->cfl_alpha_signs, w);
+ }
+#endif
+
+#if CONFIG_CB4X4
+ }
+#endif
#if CONFIG_EXT_INTRA
write_intra_angle_info(xd, ec_ctx, w);
#endif // CONFIG_EXT_INTRA
@@ -2233,7 +2187,7 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd,
#endif // CONFIG_PALETTE
#if CONFIG_FILTER_INTRA
if (bsize >= BLOCK_8X8 || unify_bsize)
- write_filter_intra_mode_info(cm, mbmi, w);
+ write_filter_intra_mode_info(cm, xd, mbmi, mi_row, mi_col, w);
#endif // CONFIG_FILTER_INTRA
#if !CONFIG_TXK_SEL
@@ -2325,12 +2279,17 @@ static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile,
cm->mi_rows, cm->mi_cols);
if (frame_is_intra_only(cm)) {
- write_mb_modes_kf(cm, xd, mi_row, mi_col, w);
+ write_mb_modes_kf(cm, xd,
+#if CONFIG_INTRABC
+ cpi->td.mb.mbmi_ext,
+#endif // CONFIG_INTRABC
+ mi_row, mi_col, w);
} else {
#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ xd->above_txfm_context =
+ cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
+ xd->left_txfm_context = xd->left_txfm_context_buffer +
+ ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2);
#endif
#if CONFIG_DUAL_FILTER
// has_subpel_mv_component needs the ref frame buffers set up to look
@@ -2539,8 +2498,12 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
const int bkh = tx_size_high_unit[max_tx_size];
for (row = 0; row < num_4x4_h; row += bkh) {
for (col = 0; col < num_4x4_w; col += bkw) {
- pack_txb_tokens(w, tok, tok_end,
-#if CONFIG_PVQ
+ pack_txb_tokens(w,
+#if CONFIG_LV_MAP
+ cm,
+#endif
+ tok, tok_end,
+#if CONFIG_PVQ || CONFIG_LV_MAP
x,
#endif
xd, mbmi, plane, plane_bsize, cm->bit_depth, block,
@@ -2556,10 +2519,10 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
}
#endif // CONFIG_RD_DEBUG
} else {
+#if CONFIG_LV_MAP
+ av1_write_coeffs_mb(cm, x, w, plane);
+#else
TX_SIZE tx = get_tx_size(plane, xd);
-#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
- tx = AOMMAX(TX_4X4, tx);
-#endif
const int bkw = tx_size_wide_unit[tx];
const int bkh = tx_size_high_unit[tx];
for (row = 0; row < num_4x4_h; row += bkh) {
@@ -2571,6 +2534,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
#endif
}
}
+#endif // CONFIG_LV_MAP
}
#else
TX_SIZE tx = get_tx_size(plane, xd);
@@ -2727,7 +2691,7 @@ static void write_partition(const AV1_COMMON *const cm,
#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
(void)cm;
-#elif CONFIG_EC_MULTISYMBOL
+#else
FRAME_CONTEXT *ec_ctx = cm->fc;
#endif
@@ -2736,24 +2700,11 @@ static void write_partition(const AV1_COMMON *const cm,
if (has_rows && has_cols) {
#if CONFIG_EXT_PARTITION_TYPES
if (bsize <= BLOCK_8X8)
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], PARTITION_TYPES);
-#else
- av1_write_token(w, av1_partition_tree, probs, &partition_encodings[p]);
-#endif
else
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], EXT_PARTITION_TYPES);
#else
- av1_write_token(w, av1_ext_partition_tree, probs,
- &ext_partition_encodings[p]);
-#endif // CONFIG_EC_MULTISYMBOL
-#else
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], PARTITION_TYPES);
-#else
- av1_write_token(w, av1_partition_tree, probs, &partition_encodings[p]);
-#endif
#endif // CONFIG_EXT_PARTITION_TYPES
} else if (!has_rows && has_cols) {
assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
@@ -2920,7 +2871,6 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
const int eset =
get_ext_tx_set(supertx_size, bsize, 1, cm->reduced_tx_set_used);
if (eset > 0) {
-#if CONFIG_EC_MULTISYMBOL
#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
#else
@@ -2929,11 +2879,6 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
aom_write_symbol(w, av1_ext_tx_inter_ind[eset][mbmi->tx_type],
ec_ctx->inter_ext_tx_cdf[eset][supertx_size],
ext_tx_cnt_inter[eset]);
-#else
- av1_write_token(w, av1_ext_tx_inter_tree[eset],
- cm->fc->inter_ext_tx_prob[eset][supertx_size],
- &ext_tx_inter_encodings[eset][mbmi->tx_type]);
-#endif
}
}
#else
@@ -2989,21 +2934,11 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_CDEF
-#if CONFIG_EXT_PARTITION
- if (cm->sb_size == BLOCK_128X128 && bsize == BLOCK_128X128 &&
- !sb_all_skip(cm, mi_row, mi_col)) {
+ if (bsize == cm->sb_size && !sb_all_skip(cm, mi_row, mi_col) &&
+ cm->cdef_bits != 0) {
aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
->mbmi.cdef_strength,
cm->cdef_bits);
- } else if (cm->sb_size == BLOCK_64X64 && bsize == BLOCK_64X64 &&
-#else
- if (bsize == BLOCK_64X64 &&
-#endif // CONFIG_EXT_PARTITION
- !sb_all_skip(cm, mi_row, mi_col)) {
- if (cm->cdef_bits != 0)
- aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
- ->mbmi.cdef_strength,
- cm->cdef_bits);
}
#endif
}
@@ -3066,7 +3001,7 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
}
#if !CONFIG_LV_MAP
-#if !CONFIG_PVQ && !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET)
+#if !CONFIG_PVQ && !CONFIG_EC_ADAPT
static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size,
av1_coeff_stats *coef_branch_ct,
av1_coeff_probs_model *coef_probs) {
@@ -3097,7 +3032,7 @@ static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size,
}
}
-#if !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET)
+#if !CONFIG_EC_ADAPT
static void update_coef_probs_common(aom_writer *const bc, AV1_COMP *cpi,
TX_SIZE tx_size,
av1_coeff_stats *frame_branch_ct,
@@ -3249,235 +3184,12 @@ static void update_coef_probs_common(aom_writer *const bc, AV1_COMP *cpi,
}
}
#endif
-#if CONFIG_SUBFRAME_PROB_UPDATE
-// Calculate the token counts between subsequent subframe updates.
-static void get_coef_counts_diff(
- AV1_COMP *cpi, int index,
- av1_coeff_count coef_counts[TX_SIZES][PLANE_TYPES],
- unsigned int eob_counts[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS]
- [COEFF_CONTEXTS]) {
- int i, j, k, l, m, tx_size, val;
- const int max_idx = cpi->common.coef_probs_update_idx;
- const TX_MODE tx_mode = cpi->common.tx_mode;
- const int max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
- const SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats;
-
- assert(max_idx < COEF_PROBS_BUFS);
-
- for (tx_size = 0; tx_size <= max_tx_size; ++tx_size)
- for (i = 0; i < PLANE_TYPES; ++i)
- for (j = 0; j < REF_TYPES; ++j)
- for (k = 0; k < COEF_BANDS; ++k)
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- if (index == max_idx) {
- val =
- cpi->common.counts.eob_branch[tx_size][i][j][k][l] -
- subframe_stats->eob_counts_buf[max_idx][tx_size][i][j][k][l];
- } else {
- val = subframe_stats
- ->eob_counts_buf[index + 1][tx_size][i][j][k][l] -
- subframe_stats->eob_counts_buf[index][tx_size][i][j][k][l];
- }
- assert(val >= 0);
- eob_counts[tx_size][i][j][k][l] = val;
-
- for (m = 0; m < ENTROPY_TOKENS; ++m) {
- if (index == max_idx) {
- val = cpi->td.rd_counts.coef_counts[tx_size][i][j][k][l][m] -
- subframe_stats
- ->coef_counts_buf[max_idx][tx_size][i][j][k][l][m];
- } else {
- val = subframe_stats
- ->coef_counts_buf[index + 1][tx_size][i][j][k][l][m] -
- subframe_stats
- ->coef_counts_buf[index][tx_size][i][j][k][l][m];
- }
- assert(val >= 0);
- coef_counts[tx_size][i][j][k][l][m] = val;
- }
- }
-}
-
-static void update_coef_probs_subframe(
- aom_writer *const bc, AV1_COMP *cpi, TX_SIZE tx_size,
- av1_coeff_stats branch_ct[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES],
- av1_coeff_probs_model *new_coef_probs) {
- av1_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size];
- const aom_prob upd = DIFF_UPDATE_PROB;
- const int entropy_nodes_update = UNCONSTRAINED_NODES;
- int i, j, k, l, t;
- int stepsize = cpi->sf.coeff_prob_appx_step;
- const int max_idx = cpi->common.coef_probs_update_idx;
- int idx;
- unsigned int this_branch_ct[ENTROPY_NODES][COEF_PROBS_BUFS][2];
-
- switch (cpi->sf.use_fast_coef_updates) {
- case TWO_LOOP: {
- /* dry run to see if there is any update at all needed */
- int savings = 0;
- int update[2] = { 0, 0 };
- for (i = 0; i < PLANE_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- for (t = 0; t < ENTROPY_NODES; ++t) {
- for (idx = 0; idx <= max_idx; ++idx) {
- memcpy(this_branch_ct[t][idx],
- branch_ct[idx][tx_size][i][j][k][l][t],
- 2 * sizeof(this_branch_ct[t][idx][0]));
- }
- }
- for (t = 0; t < entropy_nodes_update; ++t) {
- aom_prob newp = new_coef_probs[i][j][k][l][t];
- const aom_prob oldp = old_coef_probs[i][j][k][l][t];
- int s, u = 0;
-
- if (t == PIVOT_NODE)
- s = av1_prob_update_search_model_subframe(
- this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd,
- stepsize, max_idx);
- else
- s = av1_prob_update_search_subframe(this_branch_ct[t], oldp,
- &newp, upd, max_idx);
- if (s > 0 && newp != oldp) u = 1;
- if (u)
- savings += s - (int)(av1_cost_zero(upd));
- else
- savings -= (int)(av1_cost_zero(upd));
- update[u]++;
- }
- }
- }
- }
- }
-
- /* Is coef updated at all */
- if (update[1] == 0 || savings < 0) {
- aom_write_bit(bc, 0);
- return;
- }
- aom_write_bit(bc, 1);
- for (i = 0; i < PLANE_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- for (t = 0; t < ENTROPY_NODES; ++t) {
- for (idx = 0; idx <= max_idx; ++idx) {
- memcpy(this_branch_ct[t][idx],
- branch_ct[idx][tx_size][i][j][k][l][t],
- 2 * sizeof(this_branch_ct[t][idx][0]));
- }
- }
- for (t = 0; t < entropy_nodes_update; ++t) {
- aom_prob newp = new_coef_probs[i][j][k][l][t];
- aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
- int s;
- int u = 0;
-
- if (t == PIVOT_NODE)
- s = av1_prob_update_search_model_subframe(
- this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd,
- stepsize, max_idx);
- else
- s = av1_prob_update_search_subframe(this_branch_ct[t], *oldp,
- &newp, upd, max_idx);
- if (s > 0 && newp != *oldp) u = 1;
- aom_write(bc, u, upd);
- if (u) {
- /* send/use new probability */
- av1_write_prob_diff_update(bc, newp, *oldp);
- *oldp = newp;
- }
- }
- }
- }
- }
- }
- return;
- }
-
- case ONE_LOOP_REDUCED: {
- int updates = 0;
- int noupdates_before_first = 0;
- for (i = 0; i < PLANE_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- for (t = 0; t < ENTROPY_NODES; ++t) {
- for (idx = 0; idx <= max_idx; ++idx) {
- memcpy(this_branch_ct[t][idx],
- branch_ct[idx][tx_size][i][j][k][l][t],
- 2 * sizeof(this_branch_ct[t][idx][0]));
- }
- }
- for (t = 0; t < entropy_nodes_update; ++t) {
- aom_prob newp = new_coef_probs[i][j][k][l][t];
- aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
- int s;
- int u = 0;
- if (t == PIVOT_NODE)
- s = av1_prob_update_search_model_subframe(
- this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd,
- stepsize, max_idx);
- else
- s = av1_prob_update_search_subframe(this_branch_ct[t], *oldp,
- &newp, upd, max_idx);
- if (s > 0 && newp != *oldp) u = 1;
- updates += u;
- if (u == 0 && updates == 0) {
- noupdates_before_first++;
- continue;
- }
- if (u == 1 && updates == 1) {
- int v;
- // first update
- aom_write_bit(bc, 1);
- for (v = 0; v < noupdates_before_first; ++v)
- aom_write(bc, 0, upd);
- }
- aom_write(bc, u, upd);
- if (u) {
- /* send/use new probability */
- av1_write_prob_diff_update(bc, newp, *oldp);
- *oldp = newp;
- }
- }
- }
- }
- }
- }
- if (updates == 0) {
- aom_write_bit(bc, 0); // no updates
- }
- return;
- }
- default: assert(0);
- }
-}
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
-
-#if !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET)
+#if !CONFIG_EC_ADAPT
static void update_coef_probs(AV1_COMP *cpi, aom_writer *w) {
const TX_MODE tx_mode = cpi->common.tx_mode;
const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
TX_SIZE tx_size;
-#if CONFIG_SUBFRAME_PROB_UPDATE
- AV1_COMMON *cm = &cpi->common;
- SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats;
- int i;
- av1_coeff_probs_model dummy_frame_coef_probs[PLANE_TYPES];
-
- if (cm->do_subframe_update &&
- cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- av1_copy(cpi->common.fc->coef_probs,
- subframe_stats->enc_starting_coef_probs);
- for (i = 0; i <= cpi->common.coef_probs_update_idx; ++i) {
- get_coef_counts_diff(cpi, i, cpi->wholeframe_stats.coef_counts_buf[i],
- cpi->wholeframe_stats.eob_counts_buf[i]);
- }
- }
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
for (tx_size = 0; tx_size <= max_tx_size; ++tx_size) {
av1_coeff_stats frame_branch_ct[PLANE_TYPES];
@@ -3486,63 +3198,13 @@ static void update_coef_probs(AV1_COMP *cpi, aom_writer *w) {
(tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) {
aom_write_bit(w, 0);
} else {
-#if CONFIG_SUBFRAME_PROB_UPDATE
- if (cm->do_subframe_update &&
- cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- unsigned int this_eob_counts_copy[PLANE_TYPES][REF_TYPES][COEF_BANDS]
- [COEFF_CONTEXTS];
- av1_coeff_count coef_counts_copy[PLANE_TYPES];
- av1_copy(this_eob_counts_copy, cpi->common.counts.eob_branch[tx_size]);
- av1_copy(coef_counts_copy, cpi->td.rd_counts.coef_counts[tx_size]);
- build_tree_distribution(cpi, tx_size, frame_branch_ct,
- frame_coef_probs);
- for (i = 0; i <= cpi->common.coef_probs_update_idx; ++i) {
- av1_copy(cpi->common.counts.eob_branch[tx_size],
- cpi->wholeframe_stats.eob_counts_buf[i][tx_size]);
- av1_copy(cpi->td.rd_counts.coef_counts[tx_size],
- cpi->wholeframe_stats.coef_counts_buf[i][tx_size]);
- build_tree_distribution(cpi, tx_size, cpi->branch_ct_buf[i][tx_size],
- dummy_frame_coef_probs);
- }
- av1_copy(cpi->common.counts.eob_branch[tx_size], this_eob_counts_copy);
- av1_copy(cpi->td.rd_counts.coef_counts[tx_size], coef_counts_copy);
-
- update_coef_probs_subframe(w, cpi, tx_size, cpi->branch_ct_buf,
- frame_coef_probs);
- } else {
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
- build_tree_distribution(cpi, tx_size, frame_branch_ct,
- frame_coef_probs);
- update_coef_probs_common(w, cpi, tx_size, frame_branch_ct,
- frame_coef_probs);
-#if CONFIG_SUBFRAME_PROB_UPDATE
- }
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
- }
- }
-
-#if CONFIG_SUBFRAME_PROB_UPDATE
- av1_copy(cm->starting_coef_probs, cm->fc->coef_probs);
- av1_copy(subframe_stats->coef_probs_buf[0], cm->fc->coef_probs);
- if (cm->do_subframe_update &&
- cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- unsigned int eob_counts_copy[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS]
- [COEFF_CONTEXTS];
- av1_copy(eob_counts_copy, cm->counts.eob_branch);
- for (i = 1; i <= cpi->common.coef_probs_update_idx; ++i) {
- for (tx_size = 0; tx_size <= max_tx_size; ++tx_size)
- av1_full_to_model_counts(cm->counts.coef[tx_size],
- subframe_stats->coef_counts_buf[i][tx_size]);
- av1_copy(cm->counts.eob_branch, subframe_stats->eob_counts_buf[i]);
- av1_partial_adapt_probs(cm, 0, 0);
- av1_copy(subframe_stats->coef_probs_buf[i], cm->fc->coef_probs);
+ build_tree_distribution(cpi, tx_size, frame_branch_ct, frame_coef_probs);
+ update_coef_probs_common(w, cpi, tx_size, frame_branch_ct,
+ frame_coef_probs);
}
- av1_copy(cm->fc->coef_probs, subframe_stats->coef_probs_buf[0]);
- av1_copy(cm->counts.eob_branch, eob_counts_copy);
}
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
}
-#endif // !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET)
+#endif // !CONFIG_EC_ADAPT
#endif // !CONFIG_EC_ADAPT
#endif // !CONFIG_LV_MAP
@@ -3574,7 +3236,14 @@ static void encode_restoration_mode(AV1_COMMON *cm,
rsi = &cm->rst_info[p];
switch (rsi->frame_restoration_type) {
case RESTORE_NONE: aom_wb_write_bit(wb, 0); break;
- case RESTORE_WIENER: aom_wb_write_bit(wb, 1); break;
+ case RESTORE_WIENER:
+ aom_wb_write_bit(wb, 1);
+ aom_wb_write_bit(wb, 0);
+ break;
+ case RESTORE_SGRPROJ:
+ aom_wb_write_bit(wb, 1);
+ aom_wb_write_bit(wb, 1);
+ break;
default: assert(0);
}
}
@@ -3687,6 +3356,7 @@ static void encode_restoration(AV1_COMMON *cm, aom_writer *wb) {
}
for (p = 1; p < MAX_MB_PLANE; ++p) {
set_default_wiener(&ref_wiener_info);
+ set_default_sgrproj(&ref_sgrproj_info);
rsi = &cm->rst_info[p];
if (rsi->frame_restoration_type == RESTORE_WIENER) {
for (i = 0; i < ntiles_uv; ++i) {
@@ -3697,6 +3367,15 @@ static void encode_restoration(AV1_COMMON *cm, aom_writer *wb) {
write_wiener_filter(&rsi->wiener_info[i], &ref_wiener_info, wb);
}
}
+ } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
+ for (i = 0; i < ntiles_uv; ++i) {
+ if (ntiles_uv > 1)
+ aom_write(wb, rsi->restoration_type[i] != RESTORE_NONE,
+ RESTORE_NONE_SGRPROJ_PROB);
+ if (rsi->restoration_type[i] != RESTORE_NONE) {
+ write_sgrproj_filter(&rsi->sgrproj_info[i], &ref_sgrproj_info, wb);
+ }
+ }
} else if (rsi->frame_restoration_type != RESTORE_NONE) {
assert(0);
}
@@ -3972,6 +3651,9 @@ static void write_tile_info(const AV1_COMMON *const cm,
aom_wb_write_literal(wb, tile_width - 1, 6);
aom_wb_write_literal(wb, tile_height - 1, 6);
}
+#if CONFIG_DEPENDENT_HORZTILES
+ if (tile_height > 1) aom_wb_write_bit(wb, cm->dependent_horz_tiles);
+#endif
#else
int min_log2_tile_cols, max_log2_tile_cols, ones;
av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
@@ -3985,11 +3667,10 @@ static void write_tile_info(const AV1_COMMON *const cm,
// rows
aom_wb_write_bit(wb, cm->log2_tile_rows != 0);
if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->log2_tile_rows != 1);
-#endif // CONFIG_EXT_TILE
-
#if CONFIG_DEPENDENT_HORZTILES
if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->dependent_horz_tiles);
#endif
+#endif // CONFIG_EXT_TILE
#if CONFIG_LOOPFILTERING_ACROSS_TILES
aom_wb_write_bit(wb, cm->loop_filter_across_tiles_enabled);
@@ -4442,9 +4123,6 @@ static void write_render_size(const AV1_COMMON *cm,
#if CONFIG_FRAME_SUPERRES
static void write_superres_scale(const AV1_COMMON *const cm,
struct aom_write_bit_buffer *wb) {
- // This scaling and frame superres are probably incompatible
- assert(cm->width == cm->render_width && cm->height == cm->render_height);
-
// First bit is whether to to scale or not
if (cm->superres_scale_numerator == SUPERRES_SCALE_DENOMINATOR) {
aom_wb_write_bit(wb, 0); // no scaling
@@ -4460,23 +4138,9 @@ static void write_superres_scale(const AV1_COMMON *const cm,
static void write_frame_size(const AV1_COMMON *cm,
struct aom_write_bit_buffer *wb) {
-#if CONFIG_FRAME_SUPERRES
- // If SUPERRES scaling is happening, write the full resolution instead of the
- // downscaled resolution. The decoder will reduce this resolution itself.
- if (cm->superres_scale_numerator != SUPERRES_SCALE_DENOMINATOR) {
- aom_wb_write_literal(wb, cm->superres_width - 1, 16);
- aom_wb_write_literal(wb, cm->superres_height - 1, 16);
- } else {
-#endif // CONFIG_FRAME_SUPERRES
- aom_wb_write_literal(wb, cm->width - 1, 16);
- aom_wb_write_literal(wb, cm->height - 1, 16);
-#if CONFIG_FRAME_SUPERRES
- }
-#endif // CONFIG_FRAME_SUPERRES
+ aom_wb_write_literal(wb, cm->width - 1, 16);
+ aom_wb_write_literal(wb, cm->height - 1, 16);
- // TODO(afergs): Also write something different to render_size?
- // When superres scales, they'll be almost guaranteed to be
- // different on the other side.
write_render_size(cm, wb);
#if CONFIG_FRAME_SUPERRES
write_superres_scale(cm, wb);
@@ -4559,6 +4223,28 @@ void write_sequence_header(SequenceHeader *seq_params) {
}
#endif
+#if CONFIG_EXT_INTER
+static void write_compound_tools(const AV1_COMMON *cm,
+ struct aom_write_bit_buffer *wb) {
+ (void)cm;
+ (void)wb;
+#if CONFIG_INTERINTRA
+ if (!frame_is_intra_only(cm) && cm->reference_mode != COMPOUND_REFERENCE) {
+ aom_wb_write_bit(wb, cm->allow_interintra_compound);
+ } else {
+ assert(cm->allow_interintra_compound == 0);
+ }
+#endif // CONFIG_INTERINTRA
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+ if (!frame_is_intra_only(cm) && cm->reference_mode != SINGLE_REFERENCE) {
+ aom_wb_write_bit(wb, cm->allow_masked_compound);
+ } else {
+ assert(cm->allow_masked_compound == 0);
+ }
+#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+}
+#endif // CONFIG_EXT_INTER
+
static void write_uncompressed_header(AV1_COMP *cpi,
struct aom_write_bit_buffer *wb) {
AV1_COMMON *const cm = &cpi->common;
@@ -4637,14 +4323,14 @@ static void write_uncompressed_header(AV1_COMP *cpi,
assert(cpi->common.ans_window_size_log2 < 24);
aom_wb_write_literal(wb, cpi->common.ans_window_size_log2 - 8, 4);
#endif // CONFIG_ANS && ANS_MAX_SYMBOLS
-#if CONFIG_PALETTE
+#if CONFIG_PALETTE || CONFIG_INTRABC
aom_wb_write_bit(wb, cm->allow_screen_content_tools);
-#endif // CONFIG_PALETTE
+#endif // CONFIG_PALETTE || CONFIG_INTRABC
} else {
if (!cm->show_frame) aom_wb_write_bit(wb, cm->intra_only);
-#if CONFIG_PALETTE
+#if CONFIG_PALETTE || CONFIG_INTRABC
if (cm->intra_only) aom_wb_write_bit(wb, cm->allow_screen_content_tools);
-#endif // CONFIG_PALETTE
+#endif // CONFIG_PALETTE || CONFIG_INTRABC
if (!cm->error_resilient_mode) {
if (cm->intra_only) {
aom_wb_write_bit(wb,
@@ -4813,6 +4499,9 @@ static void write_uncompressed_header(AV1_COMP *cpi,
if (!use_hybrid_pred) aom_wb_write_bit(wb, use_compound_pred);
#endif // !CONFIG_REF_ADAPT
}
+#if CONFIG_EXT_INTER
+ write_compound_tools(cm, wb);
+#endif // CONFIG_EXT_INTER
#if CONFIG_EXT_TX
aom_wb_write_bit(wb, cm->reduced_tx_set_used);
@@ -4896,14 +4585,6 @@ static void write_global_motion(AV1_COMP *cpi, aom_writer *w) {
AV1_COMMON *const cm = &cpi->common;
int frame;
for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
-#if !CONFIG_REF_MV
- // With ref-mv, clearing unused global motion models here is
- // unsafe, and we need to rely on the recode loop to do it
- // instead. See av1_find_mv_refs for details.
- if (!cpi->td.rd_counts.global_motion_used[frame]) {
- set_default_warp_params(&cm->global_motion[frame]);
- }
-#endif
write_global_motion_params(
&cm->global_motion[frame], &cm->prev_frame->global_motion[frame],
cm->fc->global_motion_types_prob, w, cm->allow_high_precision_mv);
@@ -4950,13 +4631,18 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
#if !CONFIG_EC_ADAPT
update_txfm_probs(cm, header_bc, counts);
#endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ if (cm->tx_mode == TX_MODE_SELECT)
+ av1_cond_prob_diff_update(header_bc, &cm->fc->quarter_tx_size_prob,
+ cm->counts.quarter_tx_size, probwt);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
#if CONFIG_LV_MAP
av1_write_txb_probs(cpi, header_bc);
#else
#if !CONFIG_PVQ
-#if !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET)
+#if !CONFIG_EC_ADAPT
update_coef_probs(cpi, header_bc);
-#endif // !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET)
+#endif // !CONFIG_EC_ADAPT
#endif // CONFIG_PVQ
#endif // CONFIG_LV_MAP
@@ -5023,9 +4709,7 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
if (frame_is_intra_only(cm)) {
av1_copy(cm->kf_y_prob, av1_kf_y_mode_prob);
-#if CONFIG_EC_MULTISYMBOL
av1_copy(cm->fc->kf_y_cdf, av1_kf_y_mode_cdf);
-#endif
#if !CONFIG_EC_ADAPT
for (i = 0; i < INTRA_MODES; ++i)
@@ -5034,21 +4718,19 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
counts->kf_y_mode[i][j], INTRA_MODES, probwt,
header_bc);
#endif // CONFIG_EC_ADAPT
- } else {
-#if CONFIG_REF_MV
- update_inter_mode_probs(cm, header_bc, counts);
-#else
-#if !CONFIG_EC_ADAPT
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
- prob_diff_update(av1_inter_mode_tree, cm->fc->inter_mode_probs[i],
- counts->inter_mode[i], INTER_MODES, probwt, header_bc);
+#if CONFIG_INTRABC
+ if (cm->allow_screen_content_tools) {
+ av1_cond_prob_diff_update(header_bc, &fc->intrabc_prob,
+ cm->counts.intrabc, probwt);
}
#endif
-#endif
+ } else {
+ update_inter_mode_probs(cm, header_bc, counts);
#if CONFIG_EXT_INTER
update_inter_compound_mode_probs(cm, probwt, header_bc);
-
- if (cm->reference_mode != COMPOUND_REFERENCE) {
+#if CONFIG_INTERINTRA
+ if (cm->reference_mode != COMPOUND_REFERENCE &&
+ cm->allow_interintra_compound) {
for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
if (is_interintra_allowed_bsize_group(i)) {
av1_cond_prob_diff_update(header_bc, &fc->interintra_prob[i],
@@ -5060,14 +4742,17 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
av1_interintra_mode_tree, cm->fc->interintra_mode_prob[i],
counts->interintra_mode[i], INTERINTRA_MODES, probwt, header_bc);
}
+#if CONFIG_WEDGE
for (i = 0; i < BLOCK_SIZES; i++) {
if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i))
av1_cond_prob_diff_update(header_bc, &fc->wedge_interintra_prob[i],
cm->counts.wedge_interintra[i], probwt);
}
+#endif // CONFIG_WEDGE
}
+#endif // CONFIG_INTERINTRA
#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
- if (cm->reference_mode != SINGLE_REFERENCE) {
+ if (cm->reference_mode != SINGLE_REFERENCE && cm->allow_masked_compound) {
for (i = 0; i < BLOCK_SIZES; i++)
prob_diff_update(av1_compound_type_tree, fc->compound_type_prob[i],
cm->counts.compound_interinter[i], COMPOUND_TYPES,
@@ -5133,12 +4818,7 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
}
#endif
- av1_write_nmv_probs(cm, cm->allow_high_precision_mv, header_bc,
-#if CONFIG_REF_MV
- counts->mv);
-#else
- &counts->mv);
-#endif
+ av1_write_nmv_probs(cm, cm->allow_high_precision_mv, header_bc, counts->mv);
#if !CONFIG_EC_ADAPT
update_ext_tx_probs(cm, header_bc);
#endif
@@ -5149,22 +4829,12 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
write_global_motion(cpi, header_bc);
#endif // CONFIG_GLOBAL_MOTION
}
-#if CONFIG_EC_MULTISYMBOL
#if !CONFIG_EC_ADAPT
-#if CONFIG_NEW_TOKENSET
av1_coef_head_cdfs(fc);
-#endif
av1_coef_pareto_cdfs(fc);
-#if CONFIG_REF_MV
for (i = 0; i < NMV_CONTEXTS; ++i) av1_set_mv_cdfs(&fc->nmvc[i]);
-#else
- av1_set_mv_cdfs(&fc->nmvc);
-#endif
-#if CONFIG_EC_MULTISYMBOL
av1_set_mode_cdfs(cm);
-#endif
#endif // !CONFIG_EC_ADAPT
-#endif
#if CONFIG_ANS
aom_buf_ans_flush(header_bc);
header_size = buf_ans_write_end(header_bc);
diff --git a/third_party/aom/av1/encoder/block.h b/third_party/aom/av1/encoder/block.h
index 39e08d5b4..e16479e64 100644
--- a/third_party/aom/av1/encoder/block.h
+++ b/third_party/aom/av1/encoder/block.h
@@ -17,9 +17,7 @@
#if CONFIG_PVQ
#include "av1/encoder/encint.h"
#endif
-#if CONFIG_REF_MV
#include "av1/common/mvref_common.h"
-#endif
#ifdef __cplusplus
extern "C" {
@@ -79,13 +77,11 @@ typedef struct {
int dc_sign_ctx[MAX_MB_PLANE]
[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
#endif
-#if CONFIG_REF_MV
uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
#if CONFIG_EXT_INTER
int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
#endif // CONFIG_EXT_INTER
-#endif
} MB_MODE_INFO_EXT;
typedef struct {
@@ -141,27 +137,18 @@ struct macroblock {
unsigned int pred_sse[TOTAL_REFS_PER_FRAME];
int pred_mv_sad[TOTAL_REFS_PER_FRAME];
-#if CONFIG_REF_MV
int *nmvjointcost;
int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
int *nmvcost[NMV_CONTEXTS][2];
int *nmvcost_hp[NMV_CONTEXTS][2];
int **mv_cost_stack[NMV_CONTEXTS];
- int *nmvjointsadcost;
-#else
- int nmvjointcost[MV_JOINTS];
- int *nmvcost[2];
- int *nmvcost_hp[2];
- int nmvjointsadcost[MV_JOINTS];
-#endif
-
int **mvcost;
- int *nmvsadcost[2];
- int *nmvsadcost_hp[2];
- int **mvsadcost;
+
#if CONFIG_MOTION_VAR
int32_t *wsrc_buf;
int32_t *mask_buf;
+ uint8_t *above_pred_buf;
+ uint8_t *left_pred_buf;
#endif // CONFIG_MOTION_VAR
#if CONFIG_PALETTE
@@ -174,10 +161,8 @@ struct macroblock {
#if CONFIG_VAR_TX
uint8_t blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
-#if CONFIG_REF_MV
uint8_t blk_skip_drl[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
#endif
-#endif
int skip;
@@ -226,8 +211,11 @@ struct macroblock {
// This is needed when using the 8x8 Daala distortion metric during RDO,
// because it evaluates distortion in a different order than the underlying
// 4x4 blocks are coded.
- int rate_4x4[256];
-#endif
+ int rate_4x4[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+#if CONFIG_CB4X4
+ DECLARE_ALIGNED(16, uint8_t, decoded_8x8[8 * 8]);
+#endif // CONFIG_CB4X4
+#endif // CONFIG_DAALA_DIST
#if CONFIG_CFL
// Whether luma needs to be stored during RDO.
int cfl_store_y;
diff --git a/third_party/aom/av1/encoder/context_tree.h b/third_party/aom/av1/encoder/context_tree.h
index 67954126c..4f9d5e374 100644
--- a/third_party/aom/av1/encoder/context_tree.h
+++ b/third_party/aom/av1/encoder/context_tree.h
@@ -34,7 +34,6 @@ typedef struct {
uint8_t *blk_skip[MAX_MB_PLANE];
#endif
- // dual buffer pointers, 0: in use, 1: best in store
tran_low_t *coeff[MAX_MB_PLANE];
tran_low_t *qcoeff[MAX_MB_PLANE];
tran_low_t *dqcoeff[MAX_MB_PLANE];
@@ -48,9 +47,8 @@ typedef struct {
int num_4x4_blk;
int skip;
- int pred_pixel_ready;
// For current partition, only if all Y, U, and V transform blocks'
- // coefficients are quantized to 0, skippable is set to 0.
+ // coefficients are quantized to 0, skippable is set to 1.
int skippable;
int best_mode_index;
int hybrid_pred_diff;
diff --git a/third_party/aom/av1/encoder/corner_match.c b/third_party/aom/av1/encoder/corner_match.c
index 64ee0c5ae..3827b65fa 100644
--- a/third_party/aom/av1/encoder/corner_match.c
+++ b/third_party/aom/av1/encoder/corner_match.c
@@ -9,16 +9,13 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <math.h>
+#include "./av1_rtcd.h"
#include "av1/encoder/corner_match.h"
-#define MATCH_SZ 13
-#define MATCH_SZ_BY2 ((MATCH_SZ - 1) / 2)
-#define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ)
#define SEARCH_SZ 9
#define SEARCH_SZ_BY2 ((SEARCH_SZ - 1) / 2)
@@ -28,8 +25,8 @@
centered at (x, y).
*/
static double compute_variance(unsigned char *im, int stride, int x, int y) {
- int sum = 0.0;
- int sumsq = 0.0;
+ int sum = 0;
+ int sumsq = 0;
int var;
int i, j;
for (i = 0; i < MATCH_SZ; ++i)
@@ -46,9 +43,9 @@ static double compute_variance(unsigned char *im, int stride, int x, int y) {
correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows
of each image, centered at (x1, y1) and (x2, y2) respectively.
*/
-static double compute_cross_correlation(unsigned char *im1, int stride1, int x1,
- int y1, unsigned char *im2, int stride2,
- int x2, int y2) {
+double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1,
+ int y1, unsigned char *im2, int stride2,
+ int x2, int y2) {
int v1, v2;
int sum1 = 0;
int sum2 = 0;
diff --git a/third_party/aom/av1/encoder/corner_match.h b/third_party/aom/av1/encoder/corner_match.h
index c0458642c..3b16f9efc 100644
--- a/third_party/aom/av1/encoder/corner_match.h
+++ b/third_party/aom/av1/encoder/corner_match.h
@@ -15,6 +15,10 @@
#include <stdlib.h>
#include <memory.h>
+#define MATCH_SZ 13
+#define MATCH_SZ_BY2 ((MATCH_SZ - 1) / 2)
+#define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ)
+
typedef struct {
int x, y;
int rx, ry;
diff --git a/third_party/aom/av1/encoder/daala_compat_enc.c b/third_party/aom/av1/encoder/daala_compat_enc.c
index 3df424cac..c60e2d3d7 100644
--- a/third_party/aom/av1/encoder/daala_compat_enc.c
+++ b/third_party/aom/av1/encoder/daala_compat_enc.c
@@ -12,19 +12,19 @@
#include "encint.h"
void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf) {
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
od_ec_enc_checkpoint(&rbuf->ec, &enc->w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
OD_COPY(&rbuf->adapt, enc->state.adapt, 1);
}
void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf) {
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
od_ec_enc_rollback(&enc->w.ec, &rbuf->ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
OD_COPY(enc->state.adapt, &rbuf->adapt, 1);
}
diff --git a/third_party/aom/av1/encoder/dct.c b/third_party/aom/av1/encoder/dct.c
index 09e1b0563..f6b64f0f7 100644
--- a/third_party/aom/av1/encoder/dct.c
+++ b/third_party/aom/av1/encoder/dct.c
@@ -19,7 +19,7 @@
#include "aom_ports/mem.h"
#include "av1/common/blockd.h"
#include "av1/common/av1_fwd_txfm1d.h"
-#include "av1/common/av1_fwd_txfm2d_cfg.h"
+#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/idct.h"
static INLINE void range_check(const tran_low_t *input, const int size,
@@ -1022,6 +1022,10 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
}
#if CONFIG_EXT_TX
+// TODO(sarahparker) these functions will be removed once the highbitdepth
+// codepath works properly for rectangular transforms. They have almost
+// identical versions in av1_fwd_txfm1d.c, but those are currently only
+// being used for square transforms.
static void fidtx4(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 4; ++i)
@@ -2133,8 +2137,7 @@ static void fdct64_col(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
- av1_fdct64_new(in, out, fwd_cos_bit_col_dct_dct_64,
- fwd_stage_range_col_dct_dct_64);
+ av1_fdct64_new(in, out, fwd_cos_bit_col_dct_64, fwd_stage_range_col_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
@@ -2142,8 +2145,7 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
int32_t in[64], out[64];
int i;
for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
- av1_fdct64_new(in, out, fwd_cos_bit_row_dct_dct_64,
- fwd_stage_range_row_dct_dct_64);
+ av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64);
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
@@ -2225,4 +2227,49 @@ void av1_highbd_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
}
#endif // CONFIG_TX64X64
#endif // CONFIG_HIGHBITDEPTH
+
+#if CONFIG_DPCM_INTRA
+void av1_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+ tran_low_t *output) {
+ assert(tx_type < TX_TYPES_1D);
+ static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 };
+ const transform_1d ft = FHT[tx_type];
+ tran_low_t temp_in[4];
+ for (int i = 0; i < 4; ++i)
+ temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2);
+ ft(temp_in, output);
+}
+
+void av1_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+ tran_low_t *output) {
+ assert(tx_type < TX_TYPES_1D);
+ static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 };
+ const transform_1d ft = FHT[tx_type];
+ tran_low_t temp_in[8];
+ for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4;
+ ft(temp_in, output);
+}
+
+void av1_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+ tran_low_t *output) {
+ assert(tx_type < TX_TYPES_1D);
+ static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 };
+ const transform_1d ft = FHT[tx_type];
+ tran_low_t temp_in[16];
+ for (int i = 0; i < 16; ++i)
+ temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2);
+ ft(temp_in, output);
+}
+
+void av1_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
+ tran_low_t *output) {
+ assert(tx_type < TX_TYPES_1D);
+ static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32,
+ fidtx32 };
+ const transform_1d ft = FHT[tx_type];
+ tran_low_t temp_in[32];
+ for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride];
+ ft(temp_in, output);
+}
+#endif // CONFIG_DPCM_INTRA
#endif // !AV1_DCT_GTEST
diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c
index d254157e7..36d09c02a 100644
--- a/third_party/aom/av1/encoder/encodeframe.c
+++ b/third_party/aom/av1/encoder/encodeframe.c
@@ -72,8 +72,7 @@
static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx, int *rate);
+ int mi_col, BLOCK_SIZE bsize, int *rate);
#if CONFIG_SUPERTX
static int check_intra_b(PICK_MODE_CONTEXT *ctx);
@@ -273,14 +272,13 @@ static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
const int mi_width = mi_size_wide[bsize];
const int mi_height = mi_size_high[bsize];
- set_skip_context(xd, mi_row, mi_col);
-
set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
-
+ set_skip_context(xd, mi_row, mi_col);
#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ xd->above_txfm_context =
+ cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
+ xd->left_txfm_context = xd->left_txfm_context_buffer +
+ ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2);
xd->max_tx_size = max_txsize_lookup[bsize];
#endif
@@ -452,563 +450,6 @@ static void set_segment_id_supertx(const AV1_COMP *const cpi,
}
#endif // CONFIG_SUPERTX
-static void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
- MACROBLOCKD *const xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
- const int mi_width = AOMMAX(mi_size_wide[bsize], mi_size_wide[BLOCK_8X8]);
- const int mi_height = AOMMAX(mi_size_high[bsize], mi_size_high[BLOCK_8X8]);
- for (int r = 0; r < mi_height; ++r) {
- for (int c = 0; c < mi_width; ++c) {
- set_mode_info_offsets(cpi, x, xd, mi_row + r, mi_col + c);
- xd->mi[0]->mbmi.sb_type = bsize;
- }
- }
- }
-}
-
-static void set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
- MACROBLOCKD *const xd, VAR_TREE *vt, int mi_row,
- int mi_col, const int64_t *const threshold,
- const BLOCK_SIZE *const bsize_min) {
- AV1_COMMON *const cm = &cpi->common;
- const int hbw = mi_size_wide[vt->bsize] / 2;
- const int hbh = mi_size_high[vt->bsize] / 2;
- const int has_cols = mi_col + hbw < cm->mi_cols;
- const int has_rows = mi_row + hbh < cm->mi_rows;
-
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
-
- assert(vt->bsize >= BLOCK_8X8);
-
- assert(hbh == hbw);
-
- if (vt->bsize == BLOCK_8X8 && cm->frame_type != KEY_FRAME) {
- set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_8X8);
- return;
- }
-
- if (vt->force_split || (!has_cols && !has_rows)) goto split;
-
- // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
- // variance is below threshold, otherwise split will be selected.
- // No check for vert/horiz split as too few samples for variance.
- if (vt->bsize == bsize_min[0]) {
- if (has_cols && has_rows && vt->variances.none.variance < threshold[0]) {
- set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize);
- return;
- } else {
- BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_SPLIT);
- set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
- if (vt->bsize > BLOCK_8X8) {
- set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize);
- set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize);
- set_block_size(cpi, x, xd, mi_row + hbh, mi_col + hbw, subsize);
- }
- return;
- }
- } else if (vt->bsize > bsize_min[0]) {
- // For key frame: take split for bsize above 32X32 or very high variance.
- if (cm->frame_type == KEY_FRAME &&
- (vt->bsize > BLOCK_32X32 ||
- vt->variances.none.variance > (threshold[0] << 4))) {
- goto split;
- }
- // If variance is low, take the bsize (no split).
- if (has_cols && has_rows && vt->variances.none.variance < threshold[0]) {
- set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize);
- return;
- }
-
- // Check vertical split.
- if (has_rows) {
- BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_VERT);
- if (vt->variances.vert[0].variance < threshold[0] &&
- vt->variances.vert[1].variance < threshold[0] &&
- get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
- set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
- set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize);
- return;
- }
- }
- // Check horizontal split.
- if (has_cols) {
- BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_HORZ);
- if (vt->variances.horz[0].variance < threshold[0] &&
- vt->variances.horz[1].variance < threshold[0] &&
- get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
- set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
- set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize);
- return;
- }
- }
- }
-
-split : {
- set_vt_partitioning(cpi, x, xd, vt->split[0], mi_row, mi_col, threshold + 1,
- bsize_min + 1);
- set_vt_partitioning(cpi, x, xd, vt->split[1], mi_row, mi_col + hbw,
- threshold + 1, bsize_min + 1);
- set_vt_partitioning(cpi, x, xd, vt->split[2], mi_row + hbh, mi_col,
- threshold + 1, bsize_min + 1);
- set_vt_partitioning(cpi, x, xd, vt->split[3], mi_row + hbh, mi_col + hbw,
- threshold + 1, bsize_min + 1);
- return;
-}
-}
-
-// Set the variance split thresholds for following the block sizes:
-// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
-// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
-// currently only used on key frame.
-static void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], int q) {
- AV1_COMMON *const cm = &cpi->common;
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
- const int threshold_multiplier = is_key_frame ? 20 : 1;
- const int64_t threshold_base =
- (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
- if (is_key_frame) {
- thresholds[1] = threshold_base;
- thresholds[2] = threshold_base >> 2;
- thresholds[3] = threshold_base >> 2;
- thresholds[4] = threshold_base << 2;
- } else {
- thresholds[2] = threshold_base;
- if (cm->width <= 352 && cm->height <= 288) {
- thresholds[1] = threshold_base >> 2;
- thresholds[3] = threshold_base << 3;
- } else {
- thresholds[1] = threshold_base;
- thresholds[2] = (5 * threshold_base) >> 2;
- if (cm->width >= 1920 && cm->height >= 1080)
- thresholds[2] = (7 * threshold_base) >> 2;
- thresholds[3] = threshold_base << cpi->oxcf.speed;
- }
- }
- thresholds[0] = INT64_MIN;
-}
-
-void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q) {
- AV1_COMMON *const cm = &cpi->common;
- SPEED_FEATURES *const sf = &cpi->sf;
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
- if (sf->partition_search_type != VAR_BASED_PARTITION &&
- sf->partition_search_type != REFERENCE_PARTITION) {
- return;
- } else {
- set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
- // The thresholds below are not changed locally.
- if (is_key_frame) {
- cpi->vbp_threshold_sad = 0;
- cpi->vbp_bsize_min = BLOCK_8X8;
- } else {
- if (cm->width <= 352 && cm->height <= 288)
- cpi->vbp_threshold_sad = 100;
- else
- cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
- ? (cpi->y_dequant[q][1] << 1)
- : 1000;
- cpi->vbp_bsize_min = BLOCK_16X16;
- }
- cpi->vbp_threshold_minmax = 15 + (q >> 3);
- }
-}
-
-// Compute the minmax over the 8x8 subblocks.
-static int compute_minmax_8x8(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
-#if CONFIG_HIGHBITDEPTH
- int highbd,
-#endif
- int pixels_wide, int pixels_high) {
- int k;
- int minmax_max = 0;
- int minmax_min = 255;
- // Loop over the 4 8x8 subblocks.
- for (k = 0; k < 4; k++) {
- const int x8_idx = ((k & 1) << 3);
- const int y8_idx = ((k >> 1) << 3);
- int min = 0;
- int max = 0;
- if (x8_idx < pixels_wide && y8_idx < pixels_high) {
- const int src_offset = y8_idx * src_stride + x8_idx;
- const int ref_offset = y8_idx * ref_stride + x8_idx;
-#if CONFIG_HIGHBITDEPTH
- if (highbd) {
- aom_highbd_minmax_8x8(src + src_offset, src_stride, ref + ref_offset,
- ref_stride, &min, &max);
- } else {
- aom_minmax_8x8(src + src_offset, src_stride, ref + ref_offset,
- ref_stride, &min, &max);
- }
-#else
- aom_minmax_8x8(src + src_offset, src_stride, ref + ref_offset, ref_stride,
- &min, &max);
-#endif
- if ((max - min) > minmax_max) minmax_max = (max - min);
- if ((max - min) < minmax_min) minmax_min = (max - min);
- }
- }
- return (minmax_max - minmax_min);
-}
-
-#if CONFIG_HIGHBITDEPTH
-static INLINE int avg_4x4(const uint8_t *const src, const int stride,
- const int highbd) {
- if (highbd) {
- return aom_highbd_avg_4x4(src, stride);
- } else {
- return aom_avg_4x4(src, stride);
- }
-}
-#else
-static INLINE int avg_4x4(const uint8_t *const src, const int stride) {
- return aom_avg_4x4(src, stride);
-}
-#endif
-
-#if CONFIG_HIGHBITDEPTH
-static INLINE int avg_8x8(const uint8_t *const src, const int stride,
- const int highbd) {
- if (highbd) {
- return aom_highbd_avg_8x8(src, stride);
- } else {
- return aom_avg_8x8(src, stride);
- }
-}
-#else
-static INLINE int avg_8x8(const uint8_t *const src, const int stride) {
- return aom_avg_8x8(src, stride);
-}
-#endif
-
-static void init_variance_tree(VAR_TREE *const vt,
-#if CONFIG_HIGHBITDEPTH
- const int highbd,
-#endif
- BLOCK_SIZE bsize, BLOCK_SIZE leaf_size,
- const int width, const int height,
- const uint8_t *const src, const int src_stride,
- const uint8_t *const ref, const int ref_stride) {
- assert(bsize >= leaf_size);
-
- vt->bsize = bsize;
-
- vt->force_split = 0;
-
- vt->src = src;
- vt->src_stride = src_stride;
- vt->ref = ref;
- vt->ref_stride = ref_stride;
-
- vt->width = width;
- vt->height = height;
-
-#if CONFIG_HIGHBITDEPTH
- vt->highbd = highbd;
-#endif // CONFIG_HIGHBITDEPTH
-
- if (bsize > leaf_size) {
- const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
- const int px = block_size_wide[subsize];
-
- init_variance_tree(vt->split[0],
-#if CONFIG_HIGHBITDEPTH
- highbd,
-#endif // CONFIG_HIGHBITDEPTH
- subsize, leaf_size, AOMMIN(px, width),
- AOMMIN(px, height), src, src_stride, ref, ref_stride);
- init_variance_tree(vt->split[1],
-#if CONFIG_HIGHBITDEPTH
- highbd,
-#endif // CONFIG_HIGHBITDEPTH
- subsize, leaf_size, width - px, AOMMIN(px, height),
- src + px, src_stride, ref + px, ref_stride);
- init_variance_tree(vt->split[2],
-#if CONFIG_HIGHBITDEPTH
- highbd,
-#endif // CONFIG_HIGHBITDEPTH
- subsize, leaf_size, AOMMIN(px, width), height - px,
- src + px * src_stride, src_stride, ref + px * ref_stride,
- ref_stride);
- init_variance_tree(vt->split[3],
-#if CONFIG_HIGHBITDEPTH
- highbd,
-#endif // CONFIG_HIGHBITDEPTH
- subsize, leaf_size, width - px, height - px,
- src + px * src_stride + px, src_stride,
- ref + px * ref_stride + px, ref_stride);
- }
-}
-
-// Fill the variance tree based on averaging pixel values (sub-sampling), at
-// the leaf node size.
-static void fill_variance_tree(VAR_TREE *const vt, const BLOCK_SIZE leaf_size) {
- if (vt->bsize > leaf_size) {
- fill_variance_tree(vt->split[0], leaf_size);
- fill_variance_tree(vt->split[1], leaf_size);
- fill_variance_tree(vt->split[2], leaf_size);
- fill_variance_tree(vt->split[3], leaf_size);
- fill_variance_node(vt);
- } else if (vt->width <= 0 || vt->height <= 0) {
- fill_variance(0, 0, 0, &vt->variances.none);
- } else {
- unsigned int sse = 0;
- int sum = 0;
- int src_avg;
- int ref_avg;
- assert(leaf_size == BLOCK_4X4 || leaf_size == BLOCK_8X8);
- if (leaf_size == BLOCK_4X4) {
- src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd));
- ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd));
- } else {
- src_avg = avg_8x8(vt->src, vt->src_stride IF_HBD(, vt->highbd));
- ref_avg = avg_8x8(vt->ref, vt->ref_stride IF_HBD(, vt->highbd));
- }
- sum = src_avg - ref_avg;
- sse = sum * sum;
- fill_variance(sse, sum, 0, &vt->variances.none);
- }
-}
-
-static void refine_variance_tree(VAR_TREE *const vt, const int64_t threshold) {
- if (vt->bsize >= BLOCK_8X8) {
- if (vt->bsize == BLOCK_16X16) {
- if (vt->variances.none.variance <= threshold)
- return;
- else
- vt->force_split = 0;
- }
-
- refine_variance_tree(vt->split[0], threshold);
- refine_variance_tree(vt->split[1], threshold);
- refine_variance_tree(vt->split[2], threshold);
- refine_variance_tree(vt->split[3], threshold);
-
- if (vt->bsize <= BLOCK_16X16) fill_variance_node(vt);
- } else if (vt->width <= 0 || vt->height <= 0) {
- fill_variance(0, 0, 0, &vt->variances.none);
- } else {
- const int src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd));
- const int ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd));
- const int sum = src_avg - ref_avg;
- const unsigned int sse = sum * sum;
- assert(vt->bsize == BLOCK_4X4);
- fill_variance(sse, sum, 0, &vt->variances.none);
- }
-}
-
-static int check_split_key_frame(VAR_TREE *const vt, const int64_t threshold) {
- if (vt->bsize == BLOCK_32X32) {
- vt->force_split = vt->variances.none.variance > threshold;
- } else {
- vt->force_split |= check_split_key_frame(vt->split[0], threshold);
- vt->force_split |= check_split_key_frame(vt->split[1], threshold);
- vt->force_split |= check_split_key_frame(vt->split[2], threshold);
- vt->force_split |= check_split_key_frame(vt->split[3], threshold);
- }
- return vt->force_split;
-}
-
-static int check_split(AV1_COMP *const cpi, VAR_TREE *const vt,
- const int segment_id, const int64_t *const thresholds) {
- if (vt->bsize == BLOCK_16X16) {
- vt->force_split = vt->variances.none.variance > thresholds[0];
- if (!vt->force_split && vt->variances.none.variance > thresholds[-1] &&
- !cyclic_refresh_segment_id_boosted(segment_id)) {
- // We have some nominal amount of 16x16 variance (based on average),
- // compute the minmax over the 8x8 sub-blocks, and if above threshold,
- // force split to 8x8 block for this 16x16 block.
- int minmax =
- compute_minmax_8x8(vt->src, vt->src_stride, vt->ref, vt->ref_stride,
-#if CONFIG_HIGHBITDEPTH
- vt->highbd,
-#endif
- vt->width, vt->height);
- vt->force_split = minmax > cpi->vbp_threshold_minmax;
- }
- } else {
- vt->force_split |=
- check_split(cpi, vt->split[0], segment_id, thresholds + 1);
- vt->force_split |=
- check_split(cpi, vt->split[1], segment_id, thresholds + 1);
- vt->force_split |=
- check_split(cpi, vt->split[2], segment_id, thresholds + 1);
- vt->force_split |=
- check_split(cpi, vt->split[3], segment_id, thresholds + 1);
-
- if (vt->bsize == BLOCK_32X32 && !vt->force_split) {
- vt->force_split = vt->variances.none.variance > thresholds[0];
- }
- }
-
- return vt->force_split;
-}
-
-// This function chooses partitioning based on the variance between source and
-// reconstructed last (or golden), where variance is computed for down-sampled
-// inputs.
-static void choose_partitioning(AV1_COMP *const cpi, ThreadData *const td,
- const TileInfo *const tile, MACROBLOCK *const x,
- const int mi_row, const int mi_col) {
- AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- VAR_TREE *const vt = td->var_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2];
-#if CONFIG_DUAL_FILTER
- int i;
-#endif
- const uint8_t *src;
- const uint8_t *ref;
- int src_stride;
- int ref_stride;
- int pixels_wide = MI_SIZE * mi_size_wide[cm->sb_size];
- int pixels_high = MI_SIZE * mi_size_high[cm->sb_size];
- int64_t thresholds[5] = {
- cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2],
- cpi->vbp_thresholds[3], cpi->vbp_thresholds[4],
- };
- BLOCK_SIZE bsize_min[5] = { BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
- cpi->vbp_bsize_min, BLOCK_8X8 };
- const int start_level = cm->sb_size == BLOCK_64X64 ? 1 : 0;
- const int64_t *const thre = thresholds + start_level;
- const BLOCK_SIZE *const bmin = bsize_min + start_level;
-
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
- const int low_res = (cm->width <= 352 && cm->height <= 288);
-
- int segment_id = CR_SEGMENT_ID_BASE;
-
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
- const uint8_t *const map =
- cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col);
-
- if (cyclic_refresh_segment_id_boosted(segment_id)) {
- int q = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
- set_vbp_thresholds(cpi, thresholds, q);
- }
- }
-
- set_offsets(cpi, tile, x, mi_row, mi_col, cm->sb_size);
-
- if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
- if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
-
- src = x->plane[0].src.buf;
- src_stride = x->plane[0].src.stride;
-
- if (!is_key_frame) {
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
- const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
- unsigned int y_sad, y_sad_g;
-
- const int hbs = cm->mib_size / 2;
- const int split_vert = mi_col + hbs >= cm->mi_cols;
- const int split_horz = mi_row + hbs >= cm->mi_rows;
- BLOCK_SIZE bsize;
-
- if (split_vert && split_horz)
- bsize = get_subsize(cm->sb_size, PARTITION_SPLIT);
- else if (split_vert)
- bsize = get_subsize(cm->sb_size, PARTITION_VERT);
- else if (split_horz)
- bsize = get_subsize(cm->sb_size, PARTITION_HORZ);
- else
- bsize = cm->sb_size;
-
- assert(yv12 != NULL);
-
- if (yv12_g && yv12_g != yv12) {
- av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
- &cm->frame_refs[GOLDEN_FRAME - 1].sf);
- y_sad_g = cpi->fn_ptr[bsize].sdf(
- x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
- xd->plane[0].pre[0].stride);
- } else {
- y_sad_g = UINT_MAX;
- }
-
- av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
- &cm->frame_refs[LAST_FRAME - 1].sf);
- mbmi->ref_frame[0] = LAST_FRAME;
- mbmi->ref_frame[1] = NONE_FRAME;
- mbmi->sb_type = cm->sb_size;
- mbmi->mv[0].as_int = 0;
-#if CONFIG_DUAL_FILTER
- for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = BILINEAR;
-#else
- mbmi->interp_filter = BILINEAR;
-#endif
-
- y_sad = av1_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
-
- if (y_sad_g < y_sad) {
- av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
- &cm->frame_refs[GOLDEN_FRAME - 1].sf);
- mbmi->ref_frame[0] = GOLDEN_FRAME;
- mbmi->mv[0].as_int = 0;
- y_sad = y_sad_g;
- } else {
- x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv;
- }
-
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, cm->sb_size);
-
- ref = xd->plane[0].dst.buf;
- ref_stride = xd->plane[0].dst.stride;
-
- // If the y_sad is very small, take the largest partition and exit.
- // Don't check on boosted segment for now, as largest is suppressed there.
- if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
- if (!split_vert && !split_horz) {
- set_block_size(cpi, x, xd, mi_row, mi_col, cm->sb_size);
- return;
- }
- }
- } else {
- ref = AV1_VAR_OFFS;
- ref_stride = 0;
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- switch (xd->bd) {
- case 10: ref = CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10); break;
- case 12: ref = CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12); break;
- case 8:
- default: ref = CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8); break;
- }
- }
-#endif // CONFIG_HIGHBITDEPTH
- }
-
- init_variance_tree(
- vt,
-#if CONFIG_HIGHBITDEPTH
- xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH,
-#endif // CONFIG_HIGHBITDEPTH
- cm->sb_size, (is_key_frame || low_res) ? BLOCK_4X4 : BLOCK_8X8,
- pixels_wide, pixels_high, src, src_stride, ref, ref_stride);
-
- // Fill in the entire tree of variances and compute splits.
- if (is_key_frame) {
- fill_variance_tree(vt, BLOCK_4X4);
- check_split_key_frame(vt, thre[1]);
- } else {
- fill_variance_tree(vt, BLOCK_8X8);
- check_split(cpi, vt, segment_id, thre);
- if (low_res) {
- refine_variance_tree(vt, thre[1] << 1);
- }
- }
-
- vt->force_split |= mi_col + cm->mib_size > cm->mi_cols ||
- mi_row + cm->mib_size > cm->mi_rows;
-
- // Now go through the entire structure, splitting every block size until
- // we get to one that's got a variance lower than our threshold.
- set_vt_partitioning(cpi, x, xd, vt, mi_row, mi_col, thre, bmin);
-}
-
#if CONFIG_DUAL_FILTER
static void reset_intmv_filter_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
MB_MODE_INFO *mbmi) {
@@ -1067,7 +508,6 @@ static void reset_tx_size(MACROBLOCKD *xd, MB_MODE_INFO *mbmi,
}
}
-#if CONFIG_REF_MV
static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv,
int8_t rf_type) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -1116,7 +556,6 @@ static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv,
}
#endif // CONFIG_EXT_INTER
}
-#endif // CONFIG_REF_MV
static void update_state(const AV1_COMP *const cpi, ThreadData *td,
PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
@@ -1144,9 +583,7 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
const int mi_height = mi_size_high[bsize];
const int unify_bsize = CONFIG_CB4X4;
-#if CONFIG_REF_MV
int8_t rf_type;
-#endif
#if !CONFIG_SUPERTX
assert(mi->mbmi.sb_type == bsize);
@@ -1159,13 +596,11 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
reset_intmv_filter_type(cm, xd, mbmi);
#endif
-#if CONFIG_REF_MV
rf_type = av1_ref_frame_type(mbmi->ref_frame);
if (x->mbmi_ext->ref_mv_count[rf_type] > 1 &&
(mbmi->sb_type >= BLOCK_8X8 || unify_bsize)) {
set_ref_and_pred_mvs(x, mi->mbmi.pred_mv, rf_type);
}
-#endif // CONFIG_REF_MV
// If segmentation in use
if (seg->enabled) {
@@ -1250,7 +685,11 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
THR_D63_PRED /*D63_PRED*/,
#if CONFIG_ALT_INTRA
THR_SMOOTH, /*SMOOTH_PRED*/
-#endif // CONFIG_ALT_INTRA
+#if CONFIG_SMOOTH_HV
+ THR_SMOOTH_V, /*SMOOTH_V_PRED*/
+ THR_SMOOTH_H, /*SMOOTH_H_PRED*/
+#endif // CONFIG_SMOOTH_HV
+#endif // CONFIG_ALT_INTRA
THR_TM /*TM_PRED*/,
};
++mode_chosen_counts[kf_mode_index[mbmi->mode]];
@@ -1339,9 +778,7 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td,
MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
int w, h;
-#if CONFIG_REF_MV
int8_t rf_type;
-#endif
*mi_addr = *mi;
*x->mbmi_ext = ctx->mbmi_ext;
@@ -1352,13 +789,11 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td,
reset_intmv_filter_type(cm, xd, mbmi);
#endif
-#if CONFIG_REF_MV
rf_type = av1_ref_frame_type(mbmi->ref_frame);
if (x->mbmi_ext->ref_mv_count[rf_type] > 1 &&
(mbmi->sb_type >= BLOCK_8X8 || unify_bsize)) {
set_ref_and_pred_mvs(x, mi->mbmi.pred_mv, rf_type);
}
-#endif // CONFIG_REF_MV
// If segmentation in use
if (seg->enabled) {
@@ -1846,6 +1281,29 @@ static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
+ BLOCK_SIZE bsize, int bw, int bh,
+ int mi_row, int mi_col) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ const int dst_stride = pd->dst.stride;
+ uint8_t *dst = pd->dst.buf;
+
+ assert(bsize < BLOCK_8X8);
+
+ if (bsize < BLOCK_8X8) {
+ int i, j;
+ uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2];
+
+ for (j = 0; j < bh; ++j)
+ for (i = 0; i < bw; ++i) {
+ dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i];
+ }
+ }
+}
+#endif
+
static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
MACROBLOCK *const x, int mi_row, int mi_col,
RD_STATS *rd_cost,
@@ -1865,7 +1323,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
struct macroblockd_plane *const pd = xd->plane;
const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
int i, orig_rdmult;
- const int unify_bsize = CONFIG_CB4X4;
aom_clear_system_state();
@@ -1915,7 +1372,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
#endif // CONFIG_PALETTE
ctx->skippable = 0;
- ctx->pred_pixel_ready = 0;
// Set to zero to make sure we do not use the previous encoded frame stats
mbmi->skip = 0;
@@ -1967,38 +1423,21 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
*totalrate_nocoef = 0;
#endif // CONFIG_SUPERTX
} else {
- if (bsize >= BLOCK_8X8 || unify_bsize) {
- if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
- rd_cost, bsize, ctx, best_rd);
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
+ rd_cost, bsize, ctx, best_rd);
#if CONFIG_SUPERTX
- *totalrate_nocoef = rd_cost->rate;
+ *totalrate_nocoef = rd_cost->rate;
#endif // CONFIG_SUPERTX
- } else {
- av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
-#if CONFIG_SUPERTX
- totalrate_nocoef,
-#endif // CONFIG_SUPERTX
- bsize, ctx, best_rd);
-#if CONFIG_SUPERTX
- assert(*totalrate_nocoef >= 0);
-#endif // CONFIG_SUPERTX
- }
} else {
- if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- // The decoder rejects sub8x8 partitions when SEG_LVL_SKIP is set.
- rd_cost->rate = INT_MAX;
- } else {
- av1_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col,
- rd_cost,
+ av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
#if CONFIG_SUPERTX
- totalrate_nocoef,
+ totalrate_nocoef,
#endif // CONFIG_SUPERTX
- bsize, ctx, best_rd);
+ bsize, ctx, best_rd);
#if CONFIG_SUPERTX
- assert(*totalrate_nocoef >= 0);
+ assert(*totalrate_nocoef >= 0);
#endif // CONFIG_SUPERTX
- }
}
}
@@ -2020,7 +1459,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
ctx->dist = rd_cost->dist;
}
-#if CONFIG_REF_MV
static void update_inter_mode_stats(FRAME_COUNTS *counts, PREDICTION_MODE mode,
int16_t mode_context) {
int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
@@ -2050,7 +1488,6 @@ static void update_inter_mode_stats(FRAME_COUNTS *counts, PREDICTION_MODE mode,
}
}
}
-#endif
static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
int mi_col
@@ -2070,7 +1507,6 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const BLOCK_SIZE bsize = mbmi->sb_type;
- const int unify_bsize = CONFIG_CB4X4;
#if CONFIG_DELTA_Q
// delta quant applies to both intra and inter
@@ -2125,7 +1561,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
#if !SUB8X8_COMP_REF
- if (mbmi->sb_type >= BLOCK_8X8)
+ if (mbmi->sb_type != BLOCK_4X4)
counts->comp_inter[av1_get_reference_mode_context(cm, xd)]
[has_second_ref(mbmi)]++;
#else
@@ -2183,12 +1619,12 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
#endif // CONFIG_EXT_REFS
}
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
if (cm->reference_mode != COMPOUND_REFERENCE &&
#if CONFIG_SUPERTX
!supertx_enabled &&
#endif
- is_interintra_allowed(mbmi)) {
+ cm->allow_interintra_compound && is_interintra_allowed(mbmi)) {
const int bsize_group = size_group_lookup[bsize];
if (mbmi->ref_frame[1] == INTRA_FRAME) {
counts->interintra[bsize_group][1]++;
@@ -2199,7 +1635,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
counts->interintra[bsize_group][0]++;
}
}
-#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
const MOTION_MODE motion_allowed = motion_mode_allowed(
@@ -2242,105 +1678,67 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
if (inter_block &&
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
int16_t mode_ctx;
-#if !CONFIG_REF_MV
- mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
-#endif
- if (bsize >= BLOCK_8X8 || unify_bsize) {
- const PREDICTION_MODE mode = mbmi->mode;
-#if CONFIG_REF_MV
+ const PREDICTION_MODE mode = mbmi->mode;
#if CONFIG_EXT_INTER
- if (has_second_ref(mbmi)) {
- mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
- ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
- } else {
+ if (has_second_ref(mbmi)) {
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
+ } else {
#endif // CONFIG_EXT_INTER
- mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
- mbmi->ref_frame, bsize, -1);
- update_inter_mode_stats(counts, mode, mode_ctx);
+ mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, -1);
+ update_inter_mode_stats(counts, mode, mode_ctx);
#if CONFIG_EXT_INTER
- }
+ }
#endif // CONFIG_EXT_INTER
#if CONFIG_EXT_INTER
- if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
+ if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
#else
- if (mbmi->mode == NEWMV) {
+ if (mbmi->mode == NEWMV) {
#endif
- uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
- int idx;
+ uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+ int idx;
- for (idx = 0; idx < 2; ++idx) {
- if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
- uint8_t drl_ctx =
- av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
+ for (idx = 0; idx < 2; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
- if (mbmi->ref_mv_idx == idx) break;
- }
+ if (mbmi->ref_mv_idx == idx) break;
}
}
+ }
#if CONFIG_EXT_INTER
- if (have_nearmv_in_inter_mode(mbmi->mode)) {
+ if (have_nearmv_in_inter_mode(mbmi->mode)) {
#else
- if (mbmi->mode == NEARMV) {
+ if (mbmi->mode == NEARMV) {
#endif
- uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
- int idx;
+ uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+ int idx;
- for (idx = 1; idx < 3; ++idx) {
- if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
- uint8_t drl_ctx =
- av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
- ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
+ for (idx = 1; idx < 3; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
- if (mbmi->ref_mv_idx == idx - 1) break;
- }
- }
- }
-#else
-#if CONFIG_EXT_INTER
- if (is_inter_compound_mode(mode))
- ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
- else
-#endif // CONFIG_EXT_INTER
- ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
-#endif
- } else {
- const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
- const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
- int idx, idy;
- for (idy = 0; idy < 2; idy += num_4x4_h) {
- for (idx = 0; idx < 2; idx += num_4x4_w) {
- const int j = idy * 2 + idx;
- const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
-#if CONFIG_REF_MV
-#if CONFIG_EXT_INTER
- if (has_second_ref(mbmi)) {
- mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
- ++counts->inter_compound_mode[mode_ctx]
- [INTER_COMPOUND_OFFSET(b_mode)];
- } else {
-#endif // CONFIG_EXT_INTER
- mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
- mbmi->ref_frame, bsize, j);
- update_inter_mode_stats(counts, b_mode, mode_ctx);
-#if CONFIG_EXT_INTER
- }
-#endif // CONFIG_EXT_INTER
-#else
-#if CONFIG_EXT_INTER
- if (is_inter_compound_mode(b_mode))
- ++counts->inter_compound_mode[mode_ctx]
- [INTER_COMPOUND_OFFSET(b_mode)];
- else
-#endif // CONFIG_EXT_INTER
- ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
-#endif
+ if (mbmi->ref_mv_idx == idx - 1) break;
}
}
}
}
+#if CONFIG_INTRABC
+ } else {
+ if (cm->allow_screen_content_tools && bsize >= BLOCK_8X8) {
+ FRAME_COUNTS *const counts = td->counts;
+ ++counts->intrabc[mbmi->use_intrabc];
+ } else {
+ assert(!mbmi->use_intrabc);
+ }
+#endif
}
}
@@ -2352,8 +1750,8 @@ typedef struct {
#if CONFIG_VAR_TX
TXFM_CONTEXT *p_ta;
TXFM_CONTEXT *p_tl;
- TXFM_CONTEXT ta[MAX_MIB_SIZE];
- TXFM_CONTEXT tl[MAX_MIB_SIZE];
+ TXFM_CONTEXT ta[2 * MAX_MIB_SIZE];
+ TXFM_CONTEXT tl[2 * MAX_MIB_SIZE];
#endif
} RD_SEARCH_MACROBLOCK_CONTEXT;
@@ -2373,12 +1771,15 @@ static void restore_context(MACROBLOCK *x,
int mi_width = mi_size_wide[bsize];
int mi_height = mi_size_high[bsize];
for (p = 0; p < MAX_MB_PLANE; p++) {
- memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
+ int tx_col;
+ int tx_row;
+ tx_col = mi_col << (MI_SIZE_LOG2 - tx_size_wide_log2[0]);
+ tx_row = (mi_row & MAX_MIB_MASK) << (MI_SIZE_LOG2 - tx_size_high_log2[0]);
+ memcpy(xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
ctx->a + num_4x4_blocks_wide * p,
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
xd->plane[p].subsampling_x);
- memcpy(xd->left_context[p] +
- ((mi_row & MAX_MIB_MASK) * 2 >> xd->plane[p].subsampling_y),
+ memcpy(xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
ctx->l + num_4x4_blocks_high * p,
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
xd->plane[p].subsampling_y);
@@ -2391,9 +1792,9 @@ static void restore_context(MACROBLOCK *x,
xd->above_txfm_context = ctx->p_ta;
xd->left_txfm_context = ctx->p_tl;
memcpy(xd->above_txfm_context, ctx->ta,
- sizeof(*xd->above_txfm_context) * mi_width);
+ sizeof(*xd->above_txfm_context) * (mi_width << TX_UNIT_WIDE_LOG2));
memcpy(xd->left_txfm_context, ctx->tl,
- sizeof(*xd->left_txfm_context) * mi_height);
+ sizeof(*xd->left_txfm_context) * (mi_height << TX_UNIT_HIGH_LOG2));
#endif
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, rdo_buf);
@@ -2417,13 +1818,16 @@ static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
// buffer the above/left context information of the block in search.
for (p = 0; p < MAX_MB_PLANE; ++p) {
+ int tx_col;
+ int tx_row;
+ tx_col = mi_col << (MI_SIZE_LOG2 - tx_size_wide_log2[0]);
+ tx_row = (mi_row & MAX_MIB_MASK) << (MI_SIZE_LOG2 - tx_size_high_log2[0]);
memcpy(ctx->a + num_4x4_blocks_wide * p,
- xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
+ xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
xd->plane[p].subsampling_x);
memcpy(ctx->l + num_4x4_blocks_high * p,
- xd->left_context[p] +
- ((mi_row & MAX_MIB_MASK) * 2 >> xd->plane[p].subsampling_y),
+ xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
(sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
xd->plane[p].subsampling_y);
}
@@ -2433,9 +1837,9 @@ static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
sizeof(xd->left_seg_context[0]) * mi_height);
#if CONFIG_VAR_TX
memcpy(ctx->ta, xd->above_txfm_context,
- sizeof(*xd->above_txfm_context) * mi_width);
+ sizeof(*xd->above_txfm_context) * (mi_width << TX_UNIT_WIDE_LOG2));
memcpy(ctx->tl, xd->left_txfm_context,
- sizeof(*xd->left_txfm_context) * mi_height);
+ sizeof(*xd->left_txfm_context) * (mi_height << TX_UNIT_HIGH_LOG2));
ctx->p_ta = xd->above_txfm_context;
ctx->p_tl = xd->left_txfm_context;
#endif
@@ -2479,7 +1883,7 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
get_frame_new_buffer(&cpi->common), mi_row, mi_col);
}
#endif
- encode_superblock(cpi, td, tp, dry_run, mi_row, mi_col, bsize, ctx, rate);
+ encode_superblock(cpi, td, tp, dry_run, mi_row, mi_col, bsize, rate);
if (!dry_run) {
#if CONFIG_EXT_DELTA_Q
@@ -2563,12 +1967,13 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
if (!x->skip) {
int this_rate = 0;
av1_encode_sb_supertx((AV1_COMMON *)cm, x, bsize);
- av1_tokenize_sb_supertx(cpi, td, tp, dry_run, bsize, rate);
+ av1_tokenize_sb_supertx(cpi, td, tp, dry_run, mi_row, mi_col, bsize,
+ rate);
if (rate) *rate += this_rate;
} else {
xd->mi[0]->mbmi.skip = 1;
if (!dry_run) td->counts->skip[av1_get_skip_context(xd)][1]++;
- reset_skip_context(xd, bsize);
+ av1_reset_skip_context(xd, mi_row, mi_col, bsize);
}
if (!dry_run) {
for (y_idx = 0; y_idx < mi_height; y_idx++)
@@ -2849,9 +2254,10 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
pc_tree->partitioning = partition;
#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ xd->above_txfm_context =
+ cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
+ xd->left_txfm_context = xd->left_txfm_context_buffer +
+ ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2);
#endif
#if !CONFIG_PVQ
save_context(x, &x_ctx, mi_row, mi_col, bsize);
@@ -2943,7 +2349,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
av1_init_rd_stats(&tmp_rdc);
update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
- ctx_h, NULL);
+ NULL);
rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
#if CONFIG_SUPERTX
&rt_nocoef,
@@ -2986,7 +2392,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
av1_init_rd_stats(&tmp_rdc);
update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
- ctx_v, NULL);
+ NULL);
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
#if CONFIG_SUPERTX
&rt_nocoef,
@@ -3566,7 +2972,7 @@ static void rd_test_partition3(
PICK_MODE_CONTEXT *ctx_0 = &ctxs[0];
update_state(cpi, td, ctx_0, mi_row0, mi_col0, subsize0, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row0, mi_col0, subsize0,
- ctx_0, NULL);
+ NULL);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_0);
@@ -3607,7 +3013,7 @@ static void rd_test_partition3(
PICK_MODE_CONTEXT *ctx_1 = &ctxs[1];
update_state(cpi, td, ctx_1, mi_row1, mi_col1, subsize1, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row1, mi_col1, subsize1,
- ctx_1, NULL);
+ NULL);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_1);
@@ -3865,9 +3271,10 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
}
#if CONFIG_VAR_TX
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ xd->above_txfm_context =
+ cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
+ xd->left_txfm_context = xd->left_txfm_context_buffer +
+ ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2);
#endif
#if !CONFIG_PVQ
save_context(x, &x_ctx, mi_row, mi_col, bsize);
@@ -4157,9 +3564,29 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ sum_rdc.dist_y += this_rdc.dist_y;
+#endif
}
}
reached_last_index = (idx == 4);
+
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (reached_last_index && sum_rdc.rdcost != INT64_MAX &&
+ bsize == BLOCK_8X8) {
+ int use_activity_masking = 0;
+ int64_t daala_dist;
+ const int src_stride = x->plane[0].src.stride;
+ daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride - 4,
+ src_stride, x->decoded_8x8, 8, 8, 8, 1,
+ use_activity_masking, x->qindex)
+ << 4;
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ }
+#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+
#if CONFIG_SUPERTX
if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) {
TX_SIZE supertx_size = max_txsize_lookup[bsize];
@@ -4267,7 +3694,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
- ctx_h, NULL);
+ NULL);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
@@ -4297,6 +3724,16 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
subsize, &pc_tree->horizontal[1],
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
+
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
+ update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col,
+ subsize, DRY_RUN_NORMAL);
+ encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col,
+ subsize, NULL);
+ }
+#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
#if CONFIG_SUPERTX
@@ -4309,7 +3746,24 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ sum_rdc.dist_y += this_rdc.dist_y;
+#endif
}
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
+ int use_activity_masking = 0;
+ int64_t daala_dist;
+ const int src_stride = x->plane[0].src.stride;
+ daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride,
+ src_stride, x->decoded_8x8, 8, 8, 8, 1,
+ use_activity_masking, x->qindex)
+ << 4;
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ }
+#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
}
#if CONFIG_SUPERTX
@@ -4413,7 +3867,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
!force_vert_split && (bsize > BLOCK_8X8 || unify_bsize)) {
update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 1);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
- &pc_tree->vertical[0], NULL);
+ NULL);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
@@ -4444,6 +3898,16 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
subsize, &pc_tree->vertical[1],
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
+
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
+ update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step,
+ subsize, DRY_RUN_NORMAL);
+ encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step,
+ subsize, NULL);
+ }
+#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
#if CONFIG_SUPERTX
@@ -4456,7 +3920,24 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ sum_rdc.dist_y += this_rdc.dist_y;
+#endif
}
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
+ int use_activity_masking = 0;
+ int64_t daala_dist;
+ const int src_stride = x->plane[0].src.stride;
+ daala_dist =
+ av1_daala_dist(x->plane[0].src.buf - 4, src_stride, x->decoded_8x8,
+ 8, 8, 8, 1, use_activity_masking, x->qindex)
+ << 4;
+ sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+ }
+#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
}
#if CONFIG_SUPERTX
if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) {
@@ -4612,6 +4093,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
x->cfl_store_y = 0;
#endif
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
+ bsize == BLOCK_4X4 && pc_tree->index == 3) {
+ encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
+ pc_tree, NULL);
+ }
+#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
+
if (bsize == cm->sb_size) {
#if !CONFIG_PVQ && !CONFIG_LV_MAP
assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip));
@@ -4762,14 +4251,6 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
1, pc_root);
- } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
- choose_partitioning(cpi, td, tile_info, x, mi_row, mi_col);
- rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, cm->sb_size,
- &dummy_rate, &dummy_dist,
-#if CONFIG_SUPERTX
- &dummy_rate_nocoef,
-#endif // CONFIG_SUPERTX
- 1, pc_root);
} else {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
@@ -4785,32 +4266,6 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
INT64_MAX, pc_root);
}
}
-#if CONFIG_SUBFRAME_PROB_UPDATE
- if (cm->do_subframe_update &&
- cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
- const int mi_rows_per_update =
- MI_SIZE * AOMMAX(cm->mi_rows / MI_SIZE / COEF_PROBS_BUFS, 1);
- if ((mi_row + MI_SIZE) % mi_rows_per_update == 0 &&
- mi_row + MI_SIZE < cm->mi_rows &&
- cm->coef_probs_update_idx < COEF_PROBS_BUFS - 1) {
- TX_SIZE t;
- SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats;
-
- for (t = 0; t < TX_SIZES; ++t)
- av1_full_to_model_counts(cpi->td.counts->coef[t],
- cpi->td.rd_counts.coef_counts[t]);
- av1_partial_adapt_probs(cm, mi_row, mi_col);
- ++cm->coef_probs_update_idx;
- av1_copy(subframe_stats->coef_probs_buf[cm->coef_probs_update_idx],
- cm->fc->coef_probs);
- av1_copy(subframe_stats->coef_counts_buf[cm->coef_probs_update_idx],
- cpi->td.rd_counts.coef_counts);
- av1_copy(subframe_stats->eob_counts_buf[cm->coef_probs_update_idx],
- cm->counts.eob_branch);
- av1_fill_token_costs(x->token_costs, cm->fc->coef_probs);
- }
- }
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
}
static void init_encode_frame_mb_context(AV1_COMP *cpi) {
@@ -5041,16 +4496,11 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
}
}
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
od_ec_enc_init(&td->mb.daala_enc.w.ec, 65025);
-#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
-#endif
-
-#if CONFIG_DAALA_EC
od_ec_enc_reset(&td->mb.daala_enc.w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
#endif // #if CONFIG_PVQ
@@ -5079,10 +4529,10 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
(unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
assert(cpi->tok_count[tile_row][tile_col] <= allocated_tokens(*tile_info));
#if CONFIG_PVQ
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
od_ec_enc_clear(&td->mb.daala_enc.w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
td->mb.pvq_q->last_pos = td->mb.pvq_q->curr_pos;
@@ -5186,6 +4636,24 @@ static int gm_get_params_cost(WarpedMotionParams *gm,
}
return (params_cost << AV1_PROB_COST_SHIFT);
}
+
+static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm,
+ int frame) {
+ (void)num_refs_using_gm;
+ (void)frame;
+ switch (sf->gm_search_type) {
+ case GM_FULL_SEARCH: return 1;
+ case GM_REDUCED_REF_SEARCH:
+#if CONFIG_EXT_REFS
+ return !(frame == LAST2_FRAME || frame == LAST3_FRAME);
+#else
+ return (num_refs_using_gm < 2);
+#endif // CONFIG_EXT_REFS
+ case GM_DISABLE_SEARCH: return 0;
+ default: assert(0);
+ }
+ return 1;
+}
#endif // CONFIG_GLOBAL_MOTION
static void encode_frame_internal(AV1_COMP *cpi) {
@@ -5205,9 +4673,7 @@ static void encode_frame_internal(AV1_COMP *cpi) {
x->min_partition_size = AOMMIN(x->min_partition_size, cm->sb_size);
x->max_partition_size = AOMMIN(x->max_partition_size, cm->sb_size);
-#if CONFIG_REF_MV
cm->setup_mi(cm);
-#endif
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
@@ -5218,27 +4684,46 @@ static void encode_frame_internal(AV1_COMP *cpi) {
#if CONFIG_GLOBAL_MOTION
av1_zero(rdc->global_motion_used);
+ av1_zero(cpi->gmparams_cost);
if (cpi->common.frame_type == INTER_FRAME && cpi->source &&
!cpi->global_motion_search_done) {
- YV12_BUFFER_CONFIG *ref_buf;
+ YV12_BUFFER_CONFIG *ref_buf[TOTAL_REFS_PER_FRAME];
int frame;
double params_by_motion[RANSAC_NUM_MOTIONS * (MAX_PARAMDIM - 1)];
const double *params_this_motion;
int inliers_by_motion[RANSAC_NUM_MOTIONS];
WarpedMotionParams tmp_wm_params;
- static const double kInfiniteErrAdv = 1e12;
static const double kIdentityParams[MAX_PARAMDIM - 1] = {
0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
};
+ int num_refs_using_gm = 0;
for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
- ref_buf = get_ref_frame_buffer(cpi, frame);
- if (ref_buf) {
+ ref_buf[frame] = get_ref_frame_buffer(cpi, frame);
+ int pframe;
+ // check for duplicate buffer
+ for (pframe = LAST_FRAME; pframe < frame; ++pframe) {
+ if (ref_buf[frame] == ref_buf[pframe]) break;
+ }
+ if (pframe < frame) {
+ memcpy(&cm->global_motion[frame], &cm->global_motion[pframe],
+ sizeof(WarpedMotionParams));
+ } else if (ref_buf[frame] &&
+ do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame)) {
TransformationType model;
+ const int64_t ref_frame_error = av1_frame_error(
+#if CONFIG_HIGHBITDEPTH
+ xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
+#endif // CONFIG_HIGHBITDEPTH
+ ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
+ cpi->source->y_buffer, 0, 0, cpi->source->y_width,
+ cpi->source->y_height, cpi->source->y_stride);
+
+ if (ref_frame_error == 0) continue;
+
aom_clear_system_state();
for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
- double best_erroradvantage = kInfiniteErrAdv;
-
+ int64_t best_warp_error = INT64_MAX;
// Initially set all params to identity.
for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
memcpy(params_by_motion + (MAX_PARAMDIM - 1) * i, kIdentityParams,
@@ -5246,7 +4731,7 @@ static void encode_frame_internal(AV1_COMP *cpi) {
}
compute_global_motion_feature_based(
- model, cpi->source, ref_buf,
+ model, cpi->source, ref_buf[frame],
#if CONFIG_HIGHBITDEPTH
cpi->common.bit_depth,
#endif // CONFIG_HIGHBITDEPTH
@@ -5259,17 +4744,17 @@ static void encode_frame_internal(AV1_COMP *cpi) {
convert_model_to_params(params_this_motion, &tmp_wm_params);
if (tmp_wm_params.wmtype != IDENTITY) {
- const double erroradv_this_motion = refine_integerized_param(
+ const int64_t warp_error = refine_integerized_param(
&tmp_wm_params, tmp_wm_params.wmtype,
#if CONFIG_HIGHBITDEPTH
xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
#endif // CONFIG_HIGHBITDEPTH
- ref_buf->y_buffer, ref_buf->y_width, ref_buf->y_height,
- ref_buf->y_stride, cpi->source->y_buffer,
- cpi->source->y_width, cpi->source->y_height,
- cpi->source->y_stride, 3);
- if (erroradv_this_motion < best_erroradvantage) {
- best_erroradvantage = erroradv_this_motion;
+ ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
+ ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
+ cpi->source->y_buffer, cpi->source->y_width,
+ cpi->source->y_height, cpi->source->y_stride, 3);
+ if (warp_error < best_warp_error) {
+ best_warp_error = warp_error;
// Save the wm_params modified by refine_integerized_param()
// rather than motion index to avoid rerunning refine() below.
memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
@@ -5295,17 +4780,17 @@ static void encode_frame_internal(AV1_COMP *cpi) {
// If the best error advantage found doesn't meet the threshold for
// this motion type, revert to IDENTITY.
if (!is_enough_erroradvantage(
- best_erroradvantage,
+ (double)best_warp_error / ref_frame_error,
gm_get_params_cost(&cm->global_motion[frame],
&cm->prev_frame->global_motion[frame],
cm->allow_high_precision_mv))) {
set_default_warp_params(&cm->global_motion[frame]);
}
-
if (cm->global_motion[frame].wmtype != IDENTITY) break;
}
aom_clear_system_state();
}
+ if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++;
cpi->gmparams_cost[frame] =
gm_get_params_cost(&cm->global_motion[frame],
&cm->prev_frame->global_motion[frame],
@@ -5352,21 +4837,8 @@ static void encode_frame_internal(AV1_COMP *cpi) {
av1_initialize_rd_consts(cpi);
av1_initialize_me_consts(cpi, x, cm->base_qindex);
init_encode_frame_mb_context(cpi);
-#if CONFIG_TEMPMV_SIGNALING
- if (last_fb_buf_idx != INVALID_IDX) {
- cm->prev_frame = &cm->buffer_pool->frame_bufs[last_fb_buf_idx];
- cm->use_prev_frame_mvs &= !cm->error_resilient_mode &&
- cm->width == cm->prev_frame->buf.y_width &&
- cm->height == cm->prev_frame->buf.y_height &&
- !cm->intra_only && !cm->prev_frame->intra_only;
- }
-#else
- cm->use_prev_frame_mvs =
- !cm->error_resilient_mode && cm->width == cm->last_width &&
- cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
-#endif
-#if CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
// NOTE(zoeliu): As cm->prev_frame can take neither a frame of
// show_exisiting_frame=1, nor can it take a frame not used as
// a reference, it is probable that by the time it is being
@@ -5377,11 +4849,29 @@ static void encode_frame_internal(AV1_COMP *cpi) {
// (1) Simply disable the use of previous frame mvs; or
// (2) Have cm->prev_frame point to one reference frame buffer,
// e.g. LAST_FRAME.
- if (cm->use_prev_frame_mvs && !enc_is_ref_frame_buf(cpi, cm->prev_frame)) {
+ if (!enc_is_ref_frame_buf(cpi, cm->prev_frame)) {
// Reassign the LAST_FRAME buffer to cm->prev_frame.
- cm->prev_frame = &cm->buffer_pool->frame_bufs[last_fb_buf_idx];
+ cm->prev_frame = last_fb_buf_idx != INVALID_IDX
+ ? &cm->buffer_pool->frame_bufs[last_fb_buf_idx]
+ : NULL;
}
-#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
+
+#if CONFIG_TEMPMV_SIGNALING
+ if (cm->prev_frame) {
+ cm->use_prev_frame_mvs &= !cm->error_resilient_mode &&
+ cm->width == cm->prev_frame->buf.y_width &&
+ cm->height == cm->prev_frame->buf.y_height &&
+ !cm->intra_only && !cm->prev_frame->intra_only;
+ } else {
+ cm->use_prev_frame_mvs = 0;
+ }
+#else
+ cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->prev_frame &&
+ cm->width == cm->prev_frame->buf.y_crop_width &&
+ cm->height == cm->prev_frame->buf.y_crop_height &&
+ !cm->intra_only && cm->last_show_frame;
+#endif // CONFIG_TEMPMV_SIGNALING
// Special case: set prev_mi to NULL when the previous mode info
// context cannot be used.
@@ -5390,14 +4880,8 @@ static void encode_frame_internal(AV1_COMP *cpi) {
#if CONFIG_VAR_TX
x->txb_split_count = 0;
-#if CONFIG_REF_MV
av1_zero(x->blk_skip_drl);
#endif
-#endif
-
- if (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
- cpi->td.var_root[0] == NULL)
- av1_setup_var_tree(&cpi->common, &cpi->td);
{
struct aom_usec_timer emr_timer;
@@ -5429,6 +4913,20 @@ static void encode_frame_internal(AV1_COMP *cpi) {
#endif
}
+#if CONFIG_EXT_INTER
+static void make_consistent_compound_tools(AV1_COMMON *cm) {
+ (void)cm;
+#if CONFIG_INTERINTRA
+ if (frame_is_intra_only(cm) || cm->reference_mode == COMPOUND_REFERENCE)
+ cm->allow_interintra_compound = 0;
+#endif // CONFIG_INTERINTRA
+#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
+ if (frame_is_intra_only(cm) || cm->reference_mode == SINGLE_REFERENCE)
+ cm->allow_masked_compound = 0;
+#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
+}
+#endif // CONFIG_EXT_INTER
+
void av1_encode_frame(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
#if CONFIG_EXT_TX
@@ -5444,29 +4942,15 @@ void av1_encode_frame(AV1_COMP *cpi) {
// side behavior is where the ALT ref buffer has opposite sign bias to
// the other two.
if (!frame_is_intra_only(cm)) {
-#if CONFIG_LOWDELAY_COMPOUND // Normative in encoder
- cpi->allow_comp_inter_inter = 1;
-#if CONFIG_EXT_REFS
- cm->comp_fwd_ref[0] = LAST_FRAME;
- cm->comp_fwd_ref[1] = LAST2_FRAME;
- cm->comp_fwd_ref[2] = LAST3_FRAME;
- cm->comp_fwd_ref[3] = GOLDEN_FRAME;
- cm->comp_bwd_ref[0] = BWDREF_FRAME;
- cm->comp_bwd_ref[1] = ALTREF_FRAME;
-#else
- cm->comp_fixed_ref = ALTREF_FRAME;
- cm->comp_var_ref[0] = LAST_FRAME;
- cm->comp_var_ref[1] = GOLDEN_FRAME;
-#endif // CONFIG_EXT_REFS
-#else
+#if !CONFIG_ONE_SIDED_COMPOUND
if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
(cm->ref_frame_sign_bias[ALTREF_FRAME] ==
cm->ref_frame_sign_bias[LAST_FRAME])) {
cpi->allow_comp_inter_inter = 0;
} else {
+#endif
cpi->allow_comp_inter_inter = 1;
-
#if CONFIG_EXT_REFS
cm->comp_fwd_ref[0] = LAST_FRAME;
cm->comp_fwd_ref[1] = LAST2_FRAME;
@@ -5475,10 +4959,11 @@ void av1_encode_frame(AV1_COMP *cpi) {
cm->comp_bwd_ref[0] = BWDREF_FRAME;
cm->comp_bwd_ref[1] = ALTREF_FRAME;
#else
- cm->comp_fixed_ref = ALTREF_FRAME;
- cm->comp_var_ref[0] = LAST_FRAME;
- cm->comp_var_ref[1] = GOLDEN_FRAME;
-#endif // CONFIG_EXT_REFS
+ cm->comp_fixed_ref = ALTREF_FRAME;
+ cm->comp_var_ref[0] = LAST_FRAME;
+ cm->comp_var_ref[1] = GOLDEN_FRAME;
+#endif // CONFIG_EXT_REFS
+#if !CONFIG_ONE_SIDED_COMPOUND // Normative in encoder
}
#endif
} else {
@@ -5529,6 +5014,9 @@ void av1_encode_frame(AV1_COMP *cpi) {
cm->interp_filter = SWITCHABLE;
#endif
+#if CONFIG_EXT_INTER
+ make_consistent_compound_tools(cm);
+#endif // CONFIG_EXT_INTER
encode_frame_internal(cpi);
for (i = 0; i < REFERENCE_MODES; ++i)
@@ -5553,12 +5041,19 @@ void av1_encode_frame(AV1_COMP *cpi) {
#endif // !CONFIG_REF_ADAPT
}
}
+#if CONFIG_EXT_INTER
+ make_consistent_compound_tools(cm);
+#endif // CONFIG_EXT_INTER
#if CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
cm->tx_mode = ALLOW_32X32 + CONFIG_TX64X64;
#else
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ if (cm->tx_mode == TX_MODE_SELECT && counts->quarter_tx_size[1] == 0) {
+#else
if (cm->tx_mode == TX_MODE_SELECT) {
+#endif
#if CONFIG_TX64X64
int count4x4 = 0;
int count8x8_8x8p = 0, count8x8_lp = 0;
@@ -5566,41 +5061,50 @@ void av1_encode_frame(AV1_COMP *cpi) {
int count32x32_32x32p = 0, count32x32_lp = 0;
int count64x64_64x64p = 0;
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
+ int depth;
// counts->tx_size[max_depth][context_idx][this_depth_level]
- count4x4 += counts->tx_size[0][i][0];
- count4x4 += counts->tx_size[1][i][0];
- count4x4 += counts->tx_size[2][i][0];
- count4x4 += counts->tx_size[3][i][0];
-
- count8x8_8x8p += counts->tx_size[0][i][1];
- count8x8_lp += counts->tx_size[1][i][1];
- count8x8_lp += counts->tx_size[2][i][1];
- count8x8_lp += counts->tx_size[3][i][1];
-
- count16x16_16x16p += counts->tx_size[1][i][2];
- count16x16_lp += counts->tx_size[2][i][2];
- count16x16_lp += counts->tx_size[3][i][2];
-
- count32x32_32x32p += counts->tx_size[2][i][3];
- count32x32_lp += counts->tx_size[3][i][3];
-
- count64x64_64x64p += counts->tx_size[3][i][4];
+ depth = tx_size_to_depth(TX_4X4);
+ count4x4 += counts->tx_size[TX_8X8 - TX_SIZE_CTX_MIN][i][depth];
+ count4x4 += counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth];
+ count4x4 += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth];
+ count4x4 += counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth];
+
+ depth = tx_size_to_depth(TX_8X8);
+ count8x8_8x8p += counts->tx_size[TX_8X8 - TX_SIZE_CTX_MIN][i][depth];
+ count8x8_lp += counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth];
+ count8x8_lp += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth];
+ count8x8_lp += counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth];
+
+ depth = tx_size_to_depth(TX_16X16);
+ count16x16_16x16p +=
+ counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth];
+ count16x16_lp += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth];
+ count16x16_lp += counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth];
+
+ depth = tx_size_to_depth(TX_32X32);
+ count32x32_32x32p +=
+ counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth];
+ count32x32_lp += counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth];
+
+ depth = tx_size_to_depth(TX_64X64);
+ count64x64_64x64p +=
+ counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth];
}
#if CONFIG_EXT_TX && CONFIG_RECT_TX
- count4x4 += counts->tx_size_implied[0][TX_4X4];
- count4x4 += counts->tx_size_implied[1][TX_4X4];
- count4x4 += counts->tx_size_implied[2][TX_4X4];
- count4x4 += counts->tx_size_implied[3][TX_4X4];
- count8x8_8x8p += counts->tx_size_implied[1][TX_8X8];
- count8x8_lp += counts->tx_size_implied[2][TX_8X8];
- count8x8_lp += counts->tx_size_implied[3][TX_8X8];
- count8x8_lp += counts->tx_size_implied[4][TX_8X8];
- count16x16_16x16p += counts->tx_size_implied[2][TX_16X16];
- count16x16_lp += counts->tx_size_implied[3][TX_16X16];
- count16x16_lp += counts->tx_size_implied[4][TX_16X16];
- count32x32_32x32p += counts->tx_size_implied[3][TX_32X32];
- count32x32_lp += counts->tx_size_implied[4][TX_32X32];
- count64x64_64x64p += counts->tx_size_implied[4][TX_64X64];
+ count4x4 += counts->tx_size_implied[TX_4X4][TX_4X4];
+ count4x4 += counts->tx_size_implied[TX_8X8][TX_4X4];
+ count4x4 += counts->tx_size_implied[TX_16X16][TX_4X4];
+ count4x4 += counts->tx_size_implied[TX_32X32][TX_4X4];
+ count8x8_8x8p += counts->tx_size_implied[TX_8X8][TX_8X8];
+ count8x8_lp += counts->tx_size_implied[TX_16X16][TX_8X8];
+ count8x8_lp += counts->tx_size_implied[TX_32X32][TX_8X8];
+ count8x8_lp += counts->tx_size_implied[TX_64X64][TX_8X8];
+ count16x16_16x16p += counts->tx_size_implied[TX_16X16][TX_16X16];
+ count16x16_lp += counts->tx_size_implied[TX_32X32][TX_16X16];
+ count16x16_lp += counts->tx_size_implied[TX_64X64][TX_16X16];
+ count32x32_32x32p += counts->tx_size_implied[TX_32X32][TX_32X32];
+ count32x32_lp += counts->tx_size_implied[TX_64X64][TX_32X32];
+ count64x64_64x64p += counts->tx_size_implied[TX_64X64][TX_64X64];
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
count32x32_lp == 0 && count32x32_32x32p == 0 &&
@@ -5652,30 +5156,37 @@ void av1_encode_frame(AV1_COMP *cpi) {
int count16x16_16x16p = 0, count16x16_lp = 0;
int count32x32 = 0;
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
+ int depth;
// counts->tx_size[max_depth][context_idx][this_depth_level]
- count4x4 += counts->tx_size[0][i][0];
- count4x4 += counts->tx_size[1][i][0];
- count4x4 += counts->tx_size[2][i][0];
-
- count8x8_8x8p += counts->tx_size[0][i][1];
- count8x8_lp += counts->tx_size[1][i][1];
- count8x8_lp += counts->tx_size[2][i][1];
-
- count16x16_16x16p += counts->tx_size[1][i][2];
- count16x16_lp += counts->tx_size[2][i][2];
- count32x32 += counts->tx_size[2][i][3];
+ depth = tx_size_to_depth(TX_4X4);
+ count4x4 += counts->tx_size[TX_8X8 - TX_SIZE_CTX_MIN][i][depth];
+ count4x4 += counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth];
+ count4x4 += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth];
+
+ depth = tx_size_to_depth(TX_8X8);
+ count8x8_8x8p += counts->tx_size[TX_8X8 - TX_SIZE_CTX_MIN][i][depth];
+ count8x8_lp += counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth];
+ count8x8_lp += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth];
+
+ depth = tx_size_to_depth(TX_16X16);
+ count16x16_16x16p +=
+ counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth];
+ count16x16_lp += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth];
+
+ depth = tx_size_to_depth(TX_32X32);
+ count32x32 += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth];
}
#if CONFIG_EXT_TX && CONFIG_RECT_TX
- count4x4 += counts->tx_size_implied[0][TX_4X4];
- count4x4 += counts->tx_size_implied[1][TX_4X4];
- count4x4 += counts->tx_size_implied[2][TX_4X4];
- count4x4 += counts->tx_size_implied[3][TX_4X4];
- count8x8_8x8p += counts->tx_size_implied[1][TX_8X8];
- count8x8_lp += counts->tx_size_implied[2][TX_8X8];
- count8x8_lp += counts->tx_size_implied[3][TX_8X8];
- count16x16_lp += counts->tx_size_implied[3][TX_16X16];
- count16x16_16x16p += counts->tx_size_implied[2][TX_16X16];
- count32x32 += counts->tx_size_implied[3][TX_32X32];
+ count4x4 += counts->tx_size_implied[TX_4X4][TX_4X4];
+ count4x4 += counts->tx_size_implied[TX_8X8][TX_4X4];
+ count4x4 += counts->tx_size_implied[TX_16X16][TX_4X4];
+ count4x4 += counts->tx_size_implied[TX_32X32][TX_4X4];
+ count8x8_8x8p += counts->tx_size_implied[TX_8X8][TX_8X8];
+ count8x8_lp += counts->tx_size_implied[TX_16X16][TX_8X8];
+ count8x8_lp += counts->tx_size_implied[TX_32X32][TX_8X8];
+ count16x16_16x16p += counts->tx_size_implied[TX_16X16][TX_16X16];
+ count16x16_lp += counts->tx_size_implied[TX_32X32][TX_16X16];
+ count32x32 += counts->tx_size_implied[TX_32X32][TX_32X32];
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
#if CONFIG_SUPERTX
@@ -5709,6 +5220,9 @@ void av1_encode_frame(AV1_COMP *cpi) {
}
#endif
} else {
+#if CONFIG_EXT_INTER
+ make_consistent_compound_tools(cm);
+#endif // CONFIG_EXT_INTER
encode_frame_internal(cpi);
}
}
@@ -5758,6 +5272,11 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
++counts->filter_intra[0][use_filter_intra_mode];
}
if (mbmi->uv_mode == DC_PRED
+#if CONFIG_CB4X4
+ &&
+ is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y)
+#endif
#if CONFIG_PALETTE
&& mbmi->palette_mode_info.palette_size[1] == 0
#endif // CONFIG_PALETTE
@@ -5799,8 +5318,8 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
const int tx_col = blk_col >> 1;
const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
- int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row,
+ int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row,
mbmi->sb_type, tx_size);
const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
@@ -5809,8 +5328,8 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
if (tx_size == plane_tx_size) {
++counts->txfm_partition[ctx][0];
mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row, tx_size, tx_size);
+ txfm_partition_update(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row, tx_size, tx_size);
} else {
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bs = tx_size_wide_unit[sub_txs];
@@ -5822,8 +5341,8 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
if (tx_size == TX_8X8) {
mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
mbmi->tx_size = TX_4X4;
- txfm_partition_update(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row, TX_4X4, tx_size);
+ txfm_partition_update(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row, TX_4X4, tx_size);
return;
}
@@ -5847,9 +5366,10 @@ static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
const int bw = tx_size_wide_unit[max_tx_size];
int idx, idy;
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ xd->above_txfm_context =
+ cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
+ xd->left_txfm_context = xd->left_txfm_context_buffer +
+ ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2);
for (idy = 0; idy < mi_height; idy += bh)
for (idx = 0; idx < mi_width; idx += bw)
@@ -5870,8 +5390,8 @@ static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
if (tx_size == plane_tx_size) {
mbmi->tx_size = tx_size;
- txfm_partition_update(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row, tx_size, tx_size);
+ txfm_partition_update(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row, tx_size, tx_size);
} else {
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
@@ -5881,8 +5401,8 @@ static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
if (tx_size == TX_8X8) {
mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
mbmi->tx_size = TX_4X4;
- txfm_partition_update(xd->above_txfm_context + tx_col,
- xd->left_txfm_context + tx_row, TX_4X4, tx_size);
+ txfm_partition_update(xd->above_txfm_context + blk_col,
+ xd->left_txfm_context + blk_row, TX_4X4, tx_size);
return;
}
@@ -5905,9 +5425,10 @@ static void tx_partition_set_contexts(const AV1_COMMON *const cm,
const int bw = tx_size_wide_unit[max_tx_size];
int idx, idy;
- xd->above_txfm_context = cm->above_txfm_context + mi_col;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ xd->above_txfm_context =
+ cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
+ xd->left_txfm_context = xd->left_txfm_context_buffer +
+ ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2);
for (idy = 0; idy < mi_height; idy += bh)
for (idx = 0; idx < mi_width; idx += bw)
@@ -5964,8 +5485,7 @@ void av1_update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx, int *rate) {
+ int mi_col, BLOCK_SIZE bsize, int *rate) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -6039,10 +5559,9 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
&xd->block_refs[ref]->sf);
}
- if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
- av1_build_inter_predictors_sby(xd, mi_row, mi_col, NULL, block_size);
+ av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, block_size);
- av1_build_inter_predictors_sbuv(xd, mi_row, mi_col, NULL, block_size);
+ av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, NULL, block_size);
#if CONFIG_MOTION_VAR
if (mbmi->motion_mode == OBMC_CAUSAL) {
#if CONFIG_NCOBMC
@@ -6068,6 +5587,13 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
#endif
}
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ daala_dist_set_sub8x8_dst(x, x->decoded_8x8, bsize, block_size_wide[bsize],
+ block_size_high[bsize], mi_row, mi_col);
+ }
+#endif
+
if (!dry_run) {
#if CONFIG_VAR_TX
TX_SIZE tx_size =
@@ -6092,7 +5618,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
const int depth = tx_size_to_depth(coded_tx_size);
++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
- if (tx_size != max_txsize_lookup[bsize]) ++x->txb_split_count;
+ if (tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count;
}
#else
const int tx_size_ctx = get_tx_size_context(xd);
@@ -6103,6 +5629,13 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
#endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ if (is_quarter_tx_allowed(xd, mbmi, is_inter) &&
+ mbmi->tx_size != txsize_sqr_up_map[mbmi->tx_size]) {
+ ++td->counts->quarter_tx_size[mbmi->tx_size ==
+ quarter_txsize_lookup[mbmi->sb_type]];
+ }
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -6135,7 +5668,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_VAR_TX
mbmi->min_tx_size = get_min_tx_size(intra_tx_size);
- if (intra_tx_size != max_txsize_lookup[bsize]) ++x->txb_split_count;
+ if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count;
#endif
}
@@ -6327,13 +5860,13 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
}
if (!b_sub8x8)
- av1_build_inter_predictors_sb_extend(xd,
+ av1_build_inter_predictors_sb_extend(cm, xd,
#if CONFIG_EXT_INTER
mi_row_ori, mi_col_ori,
#endif // CONFIG_EXT_INTER
mi_row_pred, mi_col_pred, bsize_pred);
else
- av1_build_inter_predictors_sb_sub8x8_extend(xd,
+ av1_build_inter_predictors_sb_sub8x8_extend(cm, xd,
#if CONFIG_EXT_INTER
mi_row_ori, mi_col_ori,
#endif // CONFIG_EXT_INTER
diff --git a/third_party/aom/av1/encoder/encodeframe.h b/third_party/aom/av1/encoder/encodeframe.h
index 08d6d20de..46a99e1cf 100644
--- a/third_party/aom/av1/encoder/encodeframe.h
+++ b/third_party/aom/av1/encoder/encodeframe.h
@@ -25,13 +25,6 @@ struct yv12_buffer_config;
struct AV1_COMP;
struct ThreadData;
-// Constants used in SOURCE_VAR_BASED_PARTITION
-#define VAR_HIST_MAX_BG_VAR 1000
-#define VAR_HIST_FACTOR 10
-#define VAR_HIST_BINS (VAR_HIST_MAX_BG_VAR / VAR_HIST_FACTOR + 1)
-#define VAR_HIST_LARGE_CUT_OFF 75
-#define VAR_HIST_SMALL_CUT_OFF 45
-
void av1_setup_src_planes(struct macroblock *x,
const struct yv12_buffer_config *src, int mi_row,
int mi_col);
@@ -42,8 +35,6 @@ void av1_init_tile_data(struct AV1_COMP *cpi);
void av1_encode_tile(struct AV1_COMP *cpi, struct ThreadData *td, int tile_row,
int tile_col);
-void av1_set_variance_partition_thresholds(struct AV1_COMP *cpi, int q);
-
void av1_update_tx_type_count(const struct AV1Common *cm, MACROBLOCKD *xd,
#if CONFIG_TXK_SEL
int block, int plane,
diff --git a/third_party/aom/av1/encoder/encodemb.c b/third_party/aom/av1/encoder/encodemb.c
index c450244b1..7c9781533 100644
--- a/third_party/aom/av1/encoder/encodemb.c
+++ b/third_party/aom/av1/encoder/encodemb.c
@@ -115,7 +115,7 @@ static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
#if CONFIG_EC_ADAPT
{ 10, 7 }, { 8, 5 },
#else
- { 10, 6 }, { 8, 5 },
+ { 10, 6 }, { 8, 6 },
#endif
};
@@ -125,35 +125,31 @@ static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
}
-static INLINE int64_t
-get_token_bit_costs(unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS],
- int skip_eob, int ctx, int token) {
-#if CONFIG_NEW_TOKENSET
+static INLINE unsigned int get_token_bit_costs(
+ unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob,
+ int ctx, int token) {
(void)skip_eob;
return token_costs[token == ZERO_TOKEN || token == EOB_TOKEN][ctx][token];
-#else
- return token_costs[skip_eob][ctx][token];
-#endif
}
+#if !CONFIG_LV_MAP
#define USE_GREEDY_OPTIMIZE_B 0
#if USE_GREEDY_OPTIMIZE_B
-typedef struct av1_token_state {
+typedef struct av1_token_state_greedy {
int16_t token;
tran_low_t qc;
tran_low_t dqc;
-} av1_token_state;
+} av1_token_state_greedy;
-int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
- TX_SIZE tx_size, int ctx) {
-#if !CONFIG_PVQ
+static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
+ int block, TX_SIZE tx_size, int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi[0]->mbmi);
- av1_token_state tokens[MAX_TX_SQUARE + 1][2];
+ av1_token_state_greedy tokens[MAX_TX_SQUARE + 1][2];
uint8_t token_cache[MAX_TX_SQUARE];
const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
@@ -176,38 +172,23 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
#if CONFIG_NEW_QUANT
int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
-#elif !CONFIG_AOM_QM
- const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
#endif // CONFIG_NEW_QUANT
int sz = 0;
const int64_t rddiv = mb->rddiv;
int64_t rd_cost0, rd_cost1;
int16_t t0, t1;
int i, final_eob;
-#if CONFIG_HIGHBITDEPTH
const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
-#else
- const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
-#endif
unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
const int default_eob = tx_size_2d[tx_size];
- assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+ assert(mb->qindex > 0);
assert((!plane_type && !plane) || (plane_type && plane));
assert(eob <= default_eob);
int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
-/* CpuSpeedTest uses "--min-q=0 --max-q=0" and expects 100dB psnr
-* This creates conflict with search for a better EOB position
-* The line below is to make sure EOB search is disabled at this corner case.
-*/
-#if !CONFIG_NEW_QUANT && !CONFIG_AOM_QM
- if (dq_step[1] <= 4) {
- rdmult = 1;
- }
-#endif
int64_t rate0, rate1;
for (i = 0; i < eob; i++) {
@@ -402,22 +383,10 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
dqc_a = shift ? ROUND_POWER_OF_TWO(dqc_a, shift) : dqc_a;
if (sz) dqc_a = -dqc_a;
#else
-// The 32x32 transform coefficient uses half quantization step size.
-// Account for the rounding difference in the dequantized coefficeint
-// value when the quantization index is dropped from an even number
-// to an odd number.
-
-#if CONFIG_AOM_QM
- tran_low_t offset = dqv >> shift;
-#else
- tran_low_t offset = dq_step[rc != 0];
-#endif
- if (shift & x_a) offset += (dqv & 0x01);
-
- if (sz == 0)
- dqc_a = dqcoeff[rc] - offset;
+ if (x_a < 0)
+ dqc_a = -((-x_a * dqv) >> shift);
else
- dqc_a = dqcoeff[rc] + offset;
+ dqc_a = (x_a * dqv) >> shift;
#endif // CONFIG_NEW_QUANT
} else {
dqc_a = 0;
@@ -483,19 +452,11 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
mb->plane[plane].eobs[block] = final_eob;
return final_eob;
-
-#else // !CONFIG_PVQ
- (void)cm;
- (void)tx_size;
- (void)ctx;
- struct macroblock_plane *const p = &mb->plane[plane];
- return p->eobs[block];
-#endif // !CONFIG_PVQ
}
#else // USE_GREEDY_OPTIMIZE_B
-typedef struct av1_token_state {
+typedef struct av1_token_state_org {
int64_t error;
int rate;
int16_t next;
@@ -503,16 +464,15 @@ typedef struct av1_token_state {
tran_low_t qc;
tran_low_t dqc;
uint8_t best_index;
-} av1_token_state;
+} av1_token_state_org;
-int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
- TX_SIZE tx_size, int ctx) {
-#if !CONFIG_PVQ
+static int optimize_b_org(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
+ int block, TX_SIZE tx_size, int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi[0]->mbmi);
- av1_token_state tokens[MAX_TX_SQUARE + 1][2];
+ av1_token_state_org tokens[MAX_TX_SQUARE + 1][2];
uint8_t token_cache[MAX_TX_SQUARE];
const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
@@ -536,8 +496,6 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
#if CONFIG_NEW_QUANT
int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
-#elif !CONFIG_AOM_QM
- const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
#endif // CONFIG_NEW_QUANT
int next = eob, sz = 0;
const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
@@ -549,11 +507,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
int best, band = (eob < default_eob) ? band_translate[eob]
: band_translate[eob - 1];
int pt, i, final_eob;
-#if CONFIG_HIGHBITDEPTH
const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
-#else
- const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
-#endif
unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
const uint16_t *band_counts = &band_count_table[tx_size][band];
@@ -566,11 +520,10 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
? av1_get_qindex(&cm->seg, xd->mi[0]->mbmi.segment_id,
cm->base_qindex)
: cm->base_qindex;
- if (qindex == 0) {
- assert((qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
- }
+ assert(qindex > 0);
+ (void)qindex;
#else
- assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+ assert(mb->qindex > 0);
#endif
token_costs += band;
@@ -777,22 +730,10 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
: tokens[i][1].dqc;
if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
#else
-// The 32x32 transform coefficient uses half quantization step size.
-// Account for the rounding difference in the dequantized coefficeint
-// value when the quantization index is dropped from an even number
-// to an odd number.
-
-#if CONFIG_AOM_QM
- tran_low_t offset = dqv >> shift;
-#else
- tran_low_t offset = dq_step[rc != 0];
-#endif
- if (shift & x) offset += (dqv & 0x01);
-
- if (sz == 0)
- tokens[i][1].dqc = dqcoeff[rc] - offset;
+ if (x < 0)
+ tokens[i][1].dqc = -((-x * dqv) >> shift);
else
- tokens[i][1].dqc = dqcoeff[rc] + offset;
+ tokens[i][1].dqc = (x * dqv) >> shift;
#endif // CONFIG_NEW_QUANT
} else {
tokens[i][1].dqc = 0;
@@ -858,16 +799,47 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
mb->plane[plane].eobs[block] = final_eob;
assert(final_eob <= default_eob);
return final_eob;
-#else // !CONFIG_PVQ
+}
+
+#endif // USE_GREEDY_OPTIMIZE_B
+#endif // !CONFIG_LV_MAP
+
+int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ struct macroblock_plane *const p = &mb->plane[plane];
+ const int eob = p->eobs[block];
+ assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+ if (eob == 0) return eob;
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return eob;
+#if CONFIG_PVQ
(void)cm;
(void)tx_size;
- (void)ctx;
- struct macroblock_plane *const p = &mb->plane[plane];
- return p->eobs[block];
-#endif // !CONFIG_PVQ
-}
+ (void)a;
+ (void)l;
+ return eob;
+#endif
+
+#if !CONFIG_LV_MAP
+ (void)plane_bsize;
+#if CONFIG_VAR_TX
+ int ctx = get_entropy_context(tx_size, a, l);
+#else
+ int ctx = combine_entropy_contexts(*a, *l);
+#endif
+#if USE_GREEDY_OPTIMIZE_B
+ return optimize_b_greedy(cm, mb, plane, block, tx_size, ctx);
+#else // USE_GREEDY_OPTIMIZE_B
+ return optimize_b_org(cm, mb, plane, block, tx_size, ctx);
#endif // USE_GREEDY_OPTIMIZE_B
+#else // !CONFIG_LV_MAP
+ TXB_CTX txb_ctx;
+ get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
+ return av1_optimize_txb(cm, mb, plane, block, tx_size, &txb_ctx);
+#endif // !CONFIG_LV_MAP
+}
#if !CONFIG_PVQ
#if CONFIG_HIGHBITDEPTH
@@ -1158,8 +1130,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
#endif
#if !CONFIG_PVQ
- if (p->eobs[block] && !xd->lossless[xd->mi[0]->mbmi.segment_id])
- av1_optimize_b(cm, x, plane, block, tx_size, ctx);
+ av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
av1_set_txb_context(x, plane, block, tx_size, a, l);
@@ -1202,12 +1173,13 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
if (tx_size == plane_tx_size) {
encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
} else {
+ assert(tx_size < TX_SIZES_ALL);
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
+ assert(sub_txs < tx_size);
// This is the square transform block partition entry point.
int bsl = tx_size_wide_unit[sub_txs];
int i;
assert(bsl > 0);
- assert(tx_size < TX_SIZES_ALL);
for (i = 0; i < 4; ++i) {
const int offsetr = blk_row + ((i >> 1) * bsl);
@@ -1301,8 +1273,8 @@ void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
encode_block_pass1, &args);
}
-void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize,
- const int mi_row, const int mi_col) {
+void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
+ int mi_col) {
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -1433,6 +1405,301 @@ static void encode_block_intra_and_set_context(int plane, int block,
#endif
}
+#if CONFIG_DPCM_INTRA
+static int get_eob(const tran_low_t *qcoeff, intptr_t n_coeffs,
+ const int16_t *scan) {
+ int eob = -1;
+ for (int i = (int)n_coeffs - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ if (qcoeff[rc]) {
+ eob = i;
+ break;
+ }
+ }
+ return eob + 1;
+}
+
+static void quantize_scaler(int coeff, int16_t zbin, int16_t round_value,
+ int16_t quant, int16_t quant_shift, int16_t dequant,
+ int log_scale, tran_low_t *const qcoeff,
+ tran_low_t *const dqcoeff) {
+ zbin = ROUND_POWER_OF_TWO(zbin, log_scale);
+ round_value = ROUND_POWER_OF_TWO(round_value, log_scale);
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ if (abs_coeff >= zbin) {
+ int tmp = clamp(abs_coeff + round_value, INT16_MIN, INT16_MAX);
+ tmp = ((((tmp * quant) >> 16) + tmp) * quant_shift) >> (16 - log_scale);
+ *qcoeff = (tmp ^ coeff_sign) - coeff_sign;
+ *dqcoeff = (*qcoeff * dequant) / (1 << log_scale);
+ }
+}
+
+typedef void (*dpcm_fwd_tx_func)(const int16_t *input, int stride,
+ TX_TYPE_1D tx_type, tran_low_t *output);
+
+static dpcm_fwd_tx_func get_dpcm_fwd_tx_func(int tx_length) {
+ switch (tx_length) {
+ case 4: return av1_dpcm_ft4_c;
+ case 8: return av1_dpcm_ft8_c;
+ case 16: return av1_dpcm_ft16_c;
+ case 32:
+ return av1_dpcm_ft32_c;
+ // TODO(huisu): add support for TX_64X64.
+ default: assert(0); return NULL;
+ }
+}
+
+static void process_block_dpcm_vert(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
+ struct macroblockd_plane *const pd,
+ struct macroblock_plane *const p,
+ uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int16_t *src_diff,
+ int diff_stride, tran_low_t *coeff,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff) {
+ const int tx1d_width = tx_size_wide[tx_size];
+ dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width);
+ dpcm_inv_txfm_add_func inverse_tx =
+ av1_get_dpcm_inv_txfm_add_func(tx1d_width);
+ const int tx1d_height = tx_size_high[tx_size];
+ const int log_scale = av1_get_tx_scale(tx_size);
+ int q_idx = 0;
+ for (int r = 0; r < tx1d_height; ++r) {
+ // Update prediction.
+ if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
+ // Subtraction.
+ for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c];
+ // Forward transform.
+ forward_tx(src_diff, 1, tx_type_1d, coeff);
+ // Quantization.
+ for (int c = 0; c < tx1d_width; ++c) {
+ quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx],
+ p->quant[q_idx], p->quant_shift[q_idx],
+ pd->dequant[q_idx], log_scale, &qcoeff[c], &dqcoeff[c]);
+ q_idx = 1;
+ }
+ // Inverse transform.
+ inverse_tx(dqcoeff, 1, tx_type_1d, dst);
+ // Move to the next row.
+ coeff += tx1d_width;
+ qcoeff += tx1d_width;
+ dqcoeff += tx1d_width;
+ src_diff += diff_stride;
+ dst += dst_stride;
+ src += src_stride;
+ }
+}
+
+static void process_block_dpcm_horz(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
+ struct macroblockd_plane *const pd,
+ struct macroblock_plane *const p,
+ uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int16_t *src_diff,
+ int diff_stride, tran_low_t *coeff,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff) {
+ const int tx1d_height = tx_size_high[tx_size];
+ dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height);
+ dpcm_inv_txfm_add_func inverse_tx =
+ av1_get_dpcm_inv_txfm_add_func(tx1d_height);
+ const int tx1d_width = tx_size_wide[tx_size];
+ const int log_scale = av1_get_tx_scale(tx_size);
+ int q_idx = 0;
+ for (int c = 0; c < tx1d_width; ++c) {
+ for (int r = 0; r < tx1d_height; ++r) {
+ // Update prediction.
+ if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
+ // Subtraction.
+ src_diff[r * diff_stride] = src[r * src_stride] - dst[r * dst_stride];
+ }
+ // Forward transform.
+ tran_low_t tx_buff[64];
+ forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff);
+ for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r];
+ // Quantization.
+ for (int r = 0; r < tx1d_height; ++r) {
+ quantize_scaler(coeff[r * tx1d_width], p->zbin[q_idx], p->round[q_idx],
+ p->quant[q_idx], p->quant_shift[q_idx],
+ pd->dequant[q_idx], log_scale, &qcoeff[r * tx1d_width],
+ &dqcoeff[r * tx1d_width]);
+ q_idx = 1;
+ }
+ // Inverse transform.
+ for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width];
+ inverse_tx(tx_buff, dst_stride, tx_type_1d, dst);
+ // Move to the next column.
+ ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src;
+ }
+}
+
+#if CONFIG_HIGHBITDEPTH
+static void hbd_process_block_dpcm_vert(
+ TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, int bd,
+ struct macroblockd_plane *const pd, struct macroblock_plane *const p,
+ uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride,
+ int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff,
+ tran_low_t *dqcoeff) {
+ const int tx1d_width = tx_size_wide[tx_size];
+ dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width);
+ hbd_dpcm_inv_txfm_add_func inverse_tx =
+ av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_width);
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const int tx1d_height = tx_size_high[tx_size];
+ const int log_scale = av1_get_tx_scale(tx_size);
+ int q_idx = 0;
+ for (int r = 0; r < tx1d_height; ++r) {
+ // Update prediction.
+ if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
+ // Subtraction.
+ for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c];
+ // Forward transform.
+ forward_tx(src_diff, 1, tx_type_1d, coeff);
+ // Quantization.
+ for (int c = 0; c < tx1d_width; ++c) {
+ quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx],
+ p->quant[q_idx], p->quant_shift[q_idx],
+ pd->dequant[q_idx], log_scale, &qcoeff[c], &dqcoeff[c]);
+ q_idx = 1;
+ }
+ // Inverse transform.
+ inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst);
+ // Move to the next row.
+ coeff += tx1d_width;
+ qcoeff += tx1d_width;
+ dqcoeff += tx1d_width;
+ src_diff += diff_stride;
+ dst += dst_stride;
+ src += src_stride;
+ }
+}
+
+static void hbd_process_block_dpcm_horz(
+ TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, int bd,
+ struct macroblockd_plane *const pd, struct macroblock_plane *const p,
+ uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride,
+ int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff,
+ tran_low_t *dqcoeff) {
+ const int tx1d_height = tx_size_high[tx_size];
+ dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height);
+ hbd_dpcm_inv_txfm_add_func inverse_tx =
+ av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_height);
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const int tx1d_width = tx_size_wide[tx_size];
+ const int log_scale = av1_get_tx_scale(tx_size);
+ int q_idx = 0;
+ for (int c = 0; c < tx1d_width; ++c) {
+ for (int r = 0; r < tx1d_height; ++r) {
+ // Update prediction.
+ if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
+ // Subtraction.
+ src_diff[r * diff_stride] = src[r * src_stride] - dst[r * dst_stride];
+ }
+ // Forward transform.
+ tran_low_t tx_buff[64];
+ forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff);
+ for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r];
+ // Quantization.
+ for (int r = 0; r < tx1d_height; ++r) {
+ quantize_scaler(coeff[r * tx1d_width], p->zbin[q_idx], p->round[q_idx],
+ p->quant[q_idx], p->quant_shift[q_idx],
+ pd->dequant[q_idx], log_scale, &qcoeff[r * tx1d_width],
+ &dqcoeff[r * tx1d_width]);
+ q_idx = 1;
+ }
+ // Inverse transform.
+ for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width];
+ inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst);
+ // Move to the next column.
+ ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src;
+ }
+}
+#endif // CONFIG_HIGHBITDEPTH
+
+void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
+ PREDICTION_MODE mode, int plane, int block,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ TX_TYPE tx_type, ENTROPY_CONTEXT *ta,
+ ENTROPY_CONTEXT *tl, int8_t *skip) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ const int diff_stride = block_size_wide[plane_bsize];
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const int tx1d_width = tx_size_wide[tx_size];
+ const int tx1d_height = tx_size_high[tx_size];
+ const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, 0);
+ tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ uint8_t *dst =
+ &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+ uint8_t *src =
+ &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
+ int16_t *src_diff =
+ &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
+ uint16_t *eob = &p->eobs[block];
+ *eob = 0;
+ memset(qcoeff, 0, tx1d_height * tx1d_width * sizeof(*qcoeff));
+ memset(dqcoeff, 0, tx1d_height * tx1d_width * sizeof(*dqcoeff));
+
+ if (LIKELY(!x->skip_block)) {
+ TX_TYPE_1D tx_type_1d = DCT_1D;
+ switch (tx_type) {
+ case IDTX: tx_type_1d = IDTX_1D; break;
+ case V_DCT:
+ assert(mode == H_PRED);
+ tx_type_1d = DCT_1D;
+ break;
+ case H_DCT:
+ assert(mode == V_PRED);
+ tx_type_1d = DCT_1D;
+ break;
+ default: assert(0);
+ }
+ switch (mode) {
+ case V_PRED:
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ hbd_process_block_dpcm_vert(tx_size, tx_type_1d, xd->bd, pd, p, src,
+ src_stride, dst, dst_stride, src_diff,
+ diff_stride, coeff, qcoeff, dqcoeff);
+ } else {
+#endif // CONFIG_HIGHBITDEPTH
+ process_block_dpcm_vert(tx_size, tx_type_1d, pd, p, src, src_stride,
+ dst, dst_stride, src_diff, diff_stride, coeff,
+ qcoeff, dqcoeff);
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+ break;
+ case H_PRED:
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ hbd_process_block_dpcm_horz(tx_size, tx_type_1d, xd->bd, pd, p, src,
+ src_stride, dst, dst_stride, src_diff,
+ diff_stride, coeff, qcoeff, dqcoeff);
+ } else {
+#endif // CONFIG_HIGHBITDEPTH
+ process_block_dpcm_horz(tx_size, tx_type_1d, pd, p, src, src_stride,
+ dst, dst_stride, src_diff, diff_stride, coeff,
+ qcoeff, dqcoeff);
+#if CONFIG_HIGHBITDEPTH
+ }
+#endif // CONFIG_HIGHBITDEPTH
+ break;
+ default: assert(0);
+ }
+ *eob = get_eob(qcoeff, tx1d_height * tx1d_width, scan_order->scan);
+ }
+
+ ta[blk_col] = tl[blk_row] = *eob > 0;
+ if (*eob) *skip = 0;
+}
+#endif // CONFIG_DPCM_INTRA
+
void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
@@ -1449,7 +1716,33 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
const int dst_stride = pd->dst.stride;
uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+#if CONFIG_CFL
+
+#if CONFIG_EC_ADAPT
+ FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
+#else
+ FRAME_CONTEXT *const ec_ctx = cm->fc;
+#endif // CONFIG_EC_ADAPT
+
+ av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
+ blk_row, tx_size, plane_bsize);
+#else
av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
+#endif
+
+#if CONFIG_DPCM_INTRA
+ const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block);
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const PREDICTION_MODE mode =
+ (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
+ if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
+ av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, tx_type, args->ta,
+ args->tl, args->skip);
+ return;
+ }
+#endif // CONFIG_DPCM_INTRA
+
av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
const ENTROPY_CONTEXT *a = &args->ta[blk_col];
@@ -1458,9 +1751,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
if (args->enable_optimize_b) {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
ctx, AV1_XFORM_QUANT_FP);
- if (p->eobs[block]) {
- av1_optimize_b(cm, x, plane, block, tx_size, ctx);
- }
+ av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
} else {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
ctx, AV1_XFORM_QUANT_B);
@@ -1480,16 +1771,216 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
// Note : *(args->skip) == mbmi->skip
#endif
#if CONFIG_CFL
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
if (plane == AOM_PLANE_Y && x->cfl_store_y) {
cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
}
+
+ if (mbmi->uv_mode == DC_PRED) {
+ // TODO(ltrudeau) find a cleaner way to detect last transform block
+ if (plane == AOM_PLANE_U) {
+ xd->cfl->num_tx_blk[CFL_PRED_U] =
+ (blk_row == 0 && blk_col == 0) ? 1
+ : xd->cfl->num_tx_blk[CFL_PRED_U] + 1;
+ }
+
+ if (plane == AOM_PLANE_V) {
+ xd->cfl->num_tx_blk[CFL_PRED_V] =
+ (blk_row == 0 && blk_col == 0) ? 1
+ : xd->cfl->num_tx_blk[CFL_PRED_V] + 1;
+
+ if (mbmi->skip &&
+ xd->cfl->num_tx_blk[CFL_PRED_U] == xd->cfl->num_tx_blk[CFL_PRED_V]) {
+ assert(plane_bsize != BLOCK_INVALID);
+ const int block_width = block_size_wide[plane_bsize];
+ const int block_height = block_size_high[plane_bsize];
+
+ // if SKIP is chosen at the block level, and ind != 0, we must change
+ // the prediction
+ if (mbmi->cfl_alpha_idx != 0) {
+ const struct macroblockd_plane *const pd_cb = &xd->plane[AOM_PLANE_U];
+ uint8_t *const dst_cb = pd_cb->dst.buf;
+ const int dst_stride_cb = pd_cb->dst.stride;
+ uint8_t *const dst_cr = pd->dst.buf;
+ const int dst_stride_cr = pd->dst.stride;
+ for (int j = 0; j < block_height; j++) {
+ for (int i = 0; i < block_width; i++) {
+ dst_cb[dst_stride_cb * j + i] =
+ (uint8_t)(xd->cfl->dc_pred[CFL_PRED_U] + 0.5);
+ dst_cr[dst_stride_cr * j + i] =
+ (uint8_t)(xd->cfl->dc_pred[CFL_PRED_V] + 0.5);
+ }
+ }
+ mbmi->cfl_alpha_idx = 0;
+ mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
+ mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
+ }
+ }
+ }
+ }
#endif
}
+#if CONFIG_CFL
+static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
+ const uint8_t *src, int src_stride, int blk_width,
+ int blk_height, double dc_pred, double alpha,
+ int *dist_neg_out) {
+ const double dc_pred_bias = dc_pred + 0.5;
+ int dist = 0;
+ int diff;
+
+ if (alpha == 0.0) {
+ const int dc_pred_i = (int)dc_pred_bias;
+ for (int j = 0; j < blk_height; j++) {
+ for (int i = 0; i < blk_width; i++) {
+ diff = src[i] - dc_pred_i;
+ dist += diff * diff;
+ }
+ src += src_stride;
+ }
+
+ if (dist_neg_out) *dist_neg_out = dist;
+
+ return dist;
+ }
+
+ int dist_neg = 0;
+ for (int j = 0; j < blk_height; j++) {
+ for (int i = 0; i < blk_width; i++) {
+ const double scaled_luma = alpha * (y_pix[i] - y_avg);
+ const int uv = src[i];
+ diff = uv - (int)(scaled_luma + dc_pred_bias);
+ dist += diff * diff;
+ diff = uv + (int)(scaled_luma - dc_pred_bias);
+ dist_neg += diff * diff;
+ }
+ y_pix += y_stride;
+ src += src_stride;
+ }
+
+ if (dist_neg_out) *dist_neg_out = dist_neg;
+
+ return dist;
+}
+
+static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl,
+ BLOCK_SIZE bsize,
+ CFL_SIGN_TYPE signs_out[CFL_SIGNS]) {
+ const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
+ const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
+ const uint8_t *const src_u = p_u->src.buf;
+ const uint8_t *const src_v = p_v->src.buf;
+ const int src_stride_u = p_u->src.stride;
+ const int src_stride_v = p_v->src.stride;
+ const int block_width = block_size_wide[bsize];
+ const int block_height = block_size_high[bsize];
+ const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
+ const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
+
+ // Temporary pixel buffer used to store the CfL prediction when we compute the
+ // alpha index.
+ uint8_t tmp_pix[MAX_SB_SQUARE];
+ // Load CfL Prediction over the entire block
+ const double y_avg =
+ cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, block_width, block_height);
+
+ int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
+ sse[CFL_PRED_U][0] =
+ cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u,
+ block_width, block_height, dc_pred_u, 0, NULL);
+ sse[CFL_PRED_V][0] =
+ cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v,
+ block_width, block_height, dc_pred_v, 0, NULL);
+ for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
+ assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
+ sse[CFL_PRED_U][m] = cfl_alpha_dist(
+ tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, block_width,
+ block_height, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
+ sse[CFL_PRED_V][m] = cfl_alpha_dist(
+ tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, block_width,
+ block_height, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
+ }
+
+ int dist;
+ int64_t cost;
+ int64_t best_cost;
+
+ // Compute least squares parameter of the entire block
+ // IMPORTANT: We assume that the first code is 0,0
+ int ind = 0;
+ signs_out[CFL_PRED_U] = CFL_SIGN_POS;
+ signs_out[CFL_PRED_V] = CFL_SIGN_POS;
+
+ dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
+ dist *= 16;
+ best_cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[0], dist);
+
+ for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
+ const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
+ const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
+ for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
+ for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
+ dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
+ sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
+ dist *= 16;
+ cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[c], dist);
+ if (cost < best_cost) {
+ best_cost = cost;
+ ind = c;
+ signs_out[CFL_PRED_U] = sign_u;
+ signs_out[CFL_PRED_V] = sign_v;
+ }
+ }
+ }
+ }
+
+ return ind;
+}
+
+static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
+ assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
+ AOM_ICDF(CDF_PROB_TOP));
+ const int prob_den = CDF_PROB_TOP;
+
+ int prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[0]);
+ cfl->costs[0] = av1_cost_zero(get_prob(prob_num, prob_den));
+
+ for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
+ int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
+ (cfl_alpha_codes[c][CFL_PRED_V] != 0);
+ prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) -
+ AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]);
+ cfl->costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) +
+ av1_cost_literal(sign_bit_cost);
+ }
+}
+
+void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
+ FRAME_CONTEXT *ec_ctx, int plane,
+ int block_idx, int blk_col,
+ int blk_row, TX_SIZE tx_size,
+ BLOCK_SIZE plane_bsize) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
+ if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) {
+ CFL_CTX *const cfl = xd->cfl;
+ cfl_update_costs(cfl, ec_ctx);
+ cfl_dc_pred(xd, plane_bsize, tx_size);
+ mbmi->cfl_alpha_idx =
+ cfl_compute_alpha_ind(x, cfl, plane_bsize, mbmi->cfl_alpha_signs);
+ }
+ }
+ av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row,
+ tx_size);
+}
+#endif
+
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
- int enable_optimize_b, const int mi_row,
- const int mi_col) {
+ int enable_optimize_b, int mi_row,
+ int mi_col) {
const MACROBLOCKD *const xd = &x->e_mbd;
ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE] = { 0 };
ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE] = { 0 };
@@ -1545,9 +2036,7 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
DECLARE_ALIGNED(16, int32_t, ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
DECLARE_ALIGNED(16, int32_t, out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
-#if CONFIG_HIGHBITDEPTH
hbd_downshift = x->e_mbd.bd - 8;
-#endif
assert(OD_COEFF_SHIFT >= 4);
// DC quantizer for PVQ
@@ -1563,10 +2052,10 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
*eob = 0;
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
tell = od_ec_enc_tell_frac(&daala_enc->w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
// Change coefficient ordering for pvq encoding.
@@ -1635,11 +2124,11 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
*eob = tx_blk_size * tx_blk_size;
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
*rate = (od_ec_enc_tell_frac(&daala_enc->w.ec) - tell)
<< (AV1_PROB_COST_SHIFT - OD_BITRES);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
assert(*rate >= 0);
diff --git a/third_party/aom/av1/encoder/encodemb.h b/third_party/aom/av1/encoder/encodemb.h
index 73fde1d88..35a2c1570 100644
--- a/third_party/aom/av1/encoder/encodemb.h
+++ b/third_party/aom/av1/encoder/encodemb.h
@@ -54,7 +54,8 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
TX_SIZE tx_size, int ctx, AV1_XFORM_QUANT xform_quant_idx);
int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
- TX_SIZE tx_size, int ctx);
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l);
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
int blk_col, int blk_row, TX_SIZE tx_size);
@@ -85,6 +86,23 @@ void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k,
int *size, int skip_rest, int skip_dir, int bs);
#endif
+#if CONFIG_CFL
+void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
+ FRAME_CONTEXT *ec_ctx, int plane,
+ int block_idx, int blk_col,
+ int blk_row, TX_SIZE tx_size,
+ BLOCK_SIZE plane_bsize);
+#endif
+
+#if CONFIG_DPCM_INTRA
+void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
+ PREDICTION_MODE mode, int plane, int block,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ TX_TYPE tx_type, ENTROPY_CONTEXT *ta,
+ ENTROPY_CONTEXT *tl, int8_t *skip);
+#endif // CONFIG_DPCM_INTRA
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/encoder/encodemv.c b/third_party/aom/av1/encoder/encodemv.c
index a2a53f840..eb0ff88c4 100644
--- a/third_party/aom/av1/encoder/encodemv.c
+++ b/third_party/aom/av1/encoder/encodemv.c
@@ -45,13 +45,8 @@ static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
// Sign
aom_write(w, sign, mvcomp->sign);
-// Class
-#if CONFIG_EC_MULTISYMBOL
+ // Class
aom_write_symbol(w, mv_class, mvcomp->class_cdf, MV_CLASSES);
-#else
- av1_write_token(w, av1_mv_class_tree, mvcomp->classes,
- &mv_class_encodings[mv_class]);
-#endif
// Integer bits
if (mv_class == MV_CLASS_0) {
@@ -62,16 +57,10 @@ static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
for (i = 0; i < n; ++i) aom_write(w, (d >> i) & 1, mvcomp->bits[i]);
}
-// Fractional bits
-#if CONFIG_EC_MULTISYMBOL
+ // Fractional bits
aom_write_symbol(
w, fr, mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
MV_FP_SIZE);
-#else
- av1_write_token(w, av1_mv_fp_tree,
- mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp,
- &mv_fp_encodings[fr]);
-#endif
// High precision bit
if (usehp)
@@ -171,7 +160,6 @@ static void write_mv_update(const aom_tree_index *tree,
void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w,
nmv_context_counts *const nmv_counts) {
int i;
-#if CONFIG_REF_MV
int nmv_ctx = 0;
for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx];
@@ -213,57 +201,13 @@ void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w,
}
}
}
-#else
- nmv_context *const mvc = &cm->fc->nmvc;
- nmv_context_counts *const counts = nmv_counts;
-
-#if !CONFIG_EC_ADAPT
- write_mv_update(av1_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w);
-
- for (i = 0; i < 2; ++i) {
- int j;
- nmv_component *comp = &mvc->comps[i];
- nmv_component_counts *comp_counts = &counts->comps[i];
-
- update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
- write_mv_update(av1_mv_class_tree, comp->classes, comp_counts->classes,
- MV_CLASSES, w);
- write_mv_update(av1_mv_class0_tree, comp->class0, comp_counts->class0,
- CLASS0_SIZE, w);
- for (j = 0; j < MV_OFFSET_BITS; ++j)
- update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB);
- }
-
- for (i = 0; i < 2; ++i) {
- int j;
- for (j = 0; j < CLASS0_SIZE; ++j) {
- write_mv_update(av1_mv_fp_tree, mvc->comps[i].class0_fp[j],
- counts->comps[i].class0_fp[j], MV_FP_SIZE, w);
- }
- write_mv_update(av1_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp,
- MV_FP_SIZE, w);
- }
-#endif // !CONFIG_EC_ADAPT
-
- if (usehp) {
- for (i = 0; i < 2; ++i) {
- update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp,
- MV_UPDATE_PROB);
- update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
- }
- }
-#endif
}
void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
nmv_context *mvctx, int usehp) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, j, mvctx->joint_cdf, MV_JOINTS);
-#else
- av1_write_token(w, av1_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]);
-#endif
if (mv_joint_vertical(j))
encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
@@ -284,11 +228,7 @@ void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
const MV diff = { mv->row - ref->row, mv->col - ref->col };
const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
-#if CONFIG_EC_MULTISYMBOL
aom_write_symbol(w, j, mvctx->joint_cdf, MV_JOINTS);
-#else
- av1_write_token(w, av1_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]);
-#endif
if (mv_joint_vertical(j))
encode_mv_component(w, diff.row, &mvctx->comps[0], 0);
@@ -306,135 +246,101 @@ void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
#if CONFIG_EXT_INTER
static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
- const int_mv mvs[2],
-#if CONFIG_REF_MV
- const int_mv pred_mvs[2],
-#endif
+ const int_mv mvs[2], const int_mv pred_mvs[2],
nmv_context_counts *nmv_counts) {
int i;
PREDICTION_MODE mode = mbmi->mode;
-#if !CONFIG_REF_MV
- nmv_context_counts *counts = nmv_counts;
-#endif
if (mode == NEWMV || mode == NEW_NEWMV) {
for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
const MV diff = { mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col };
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx);
nmv_context_counts *counts = &nmv_counts[nmv_ctx];
(void)pred_mvs;
-#endif
av1_inc_mv(&diff, counts, 1);
}
} else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
const MV diff = { mvs[1].as_mv.row - ref->row,
mvs[1].as_mv.col - ref->col };
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
nmv_context_counts *counts = &nmv_counts[nmv_ctx];
-#endif
av1_inc_mv(&diff, counts, 1);
} else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
const MV diff = { mvs[0].as_mv.row - ref->row,
mvs[0].as_mv.col - ref->col };
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
nmv_context_counts *counts = &nmv_counts[nmv_ctx];
-#endif
av1_inc_mv(&diff, counts, 1);
}
}
static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
-#if CONFIG_REF_MV
const MB_MODE_INFO_EXT *mbmi_ext,
-#endif
nmv_context_counts *nmv_counts) {
int i;
PREDICTION_MODE mode = mi->bmi[block].as_mode;
-#if CONFIG_REF_MV
const MB_MODE_INFO *mbmi = &mi->mbmi;
-#else
- nmv_context_counts *counts = nmv_counts;
-#endif
if (mode == NEWMV || mode == NEW_NEWMV) {
for (i = 0; i < 1 + has_second_ref(&mi->mbmi); ++i) {
const MV *ref = &mi->bmi[block].ref_mv[i].as_mv;
const MV diff = { mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col };
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx);
nmv_context_counts *counts = &nmv_counts[nmv_ctx];
-#endif
av1_inc_mv(&diff, counts, 1);
}
} else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
const MV diff = { mvs[1].as_mv.row - ref->row,
mvs[1].as_mv.col - ref->col };
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
nmv_context_counts *counts = &nmv_counts[nmv_ctx];
-#endif
av1_inc_mv(&diff, counts, 1);
} else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
const MV diff = { mvs[0].as_mv.row - ref->row,
mvs[0].as_mv.col - ref->col };
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
nmv_context_counts *counts = &nmv_counts[nmv_ctx];
-#endif
av1_inc_mv(&diff, counts, 1);
}
}
#else
static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
- const int_mv mvs[2],
-#if CONFIG_REF_MV
- const int_mv pred_mvs[2],
-#endif
+ const int_mv mvs[2], const int_mv pred_mvs[2],
nmv_context_counts *nmv_counts) {
int i;
-#if !CONFIG_REF_MV
- nmv_context_counts *counts = nmv_counts;
-#endif
for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
-#if CONFIG_REF_MV
int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
int nmv_ctx =
av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx);
nmv_context_counts *counts = &nmv_counts[nmv_ctx];
const MV *ref = &pred_mvs[i].as_mv;
-#else
- const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
-#endif
const MV diff = { mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col };
av1_inc_mv(&diff, counts, 1);
@@ -464,20 +370,11 @@ void av1_update_mv_count(ThreadData *td) {
#if CONFIG_EXT_INTER
if (have_newmv_in_inter_mode(mi->bmi[i].as_mode))
- inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv,
-#if CONFIG_REF_MV
- mbmi_ext, td->counts->mv);
-#else
- &td->counts->mv);
-#endif
+ inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, mbmi_ext, td->counts->mv);
#else
if (mi->bmi[i].as_mode == NEWMV)
- inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv,
-#if CONFIG_REF_MV
- mi->bmi[i].pred_mv, td->counts->mv);
-#else
- &td->counts->mv);
-#endif
+ inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, mi->bmi[i].pred_mv,
+ td->counts->mv);
#endif // CONFIG_EXT_INTER
}
}
@@ -487,11 +384,6 @@ void av1_update_mv_count(ThreadData *td) {
#else
if (mbmi->mode == NEWMV)
#endif // CONFIG_EXT_INTER
- inc_mvs(mbmi, mbmi_ext, mbmi->mv,
-#if CONFIG_REF_MV
- mbmi->pred_mv, td->counts->mv);
-#else
- &td->counts->mv);
-#endif
+ inc_mvs(mbmi, mbmi_ext, mbmi->mv, mbmi->pred_mv, td->counts->mv);
}
}
diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c
index 027109151..4782ce2b7 100644
--- a/third_party/aom/av1/encoder/encoder.c
+++ b/third_party/aom/av1/encoder/encoder.c
@@ -246,29 +246,17 @@ void av1_set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv) {
MACROBLOCK *const mb = &cpi->td.mb;
cpi->common.allow_high_precision_mv = allow_high_precision_mv;
-#if CONFIG_REF_MV
if (cpi->common.allow_high_precision_mv) {
int i;
for (i = 0; i < NMV_CONTEXTS; ++i) {
mb->mv_cost_stack[i] = mb->nmvcost_hp[i];
- mb->mvsadcost = mb->nmvsadcost_hp;
}
} else {
int i;
for (i = 0; i < NMV_CONTEXTS; ++i) {
mb->mv_cost_stack[i] = mb->nmvcost[i];
- mb->mvsadcost = mb->nmvsadcost;
}
}
-#else
- if (cpi->common.allow_high_precision_mv) {
- mb->mvcost = mb->nmvcost_hp;
- mb->mvsadcost = mb->nmvcost_hp;
- } else {
- mb->mvcost = mb->nmvcost;
- mb->mvsadcost = mb->nmvcost;
- }
-#endif
}
static BLOCK_SIZE select_sb_size(const AV1_COMP *const cpi) {
@@ -334,13 +322,14 @@ static void setup_frame(AV1_COMP *cpi) {
av1_zero(cpi->interp_filter_selected[0]);
}
#if CONFIG_EXT_REFS
-#if CONFIG_LOWDELAY_COMPOUND // No change to bitstream
+#if CONFIG_ONE_SIDED_COMPOUND // No change to bitstream
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
cpi->refresh_bwd_ref_frame = cpi->refresh_last_frame;
cpi->rc.is_bipred_frame = 1;
}
#endif
#endif
+ cm->pre_fc = &cm->frame_contexts[cm->frame_context_idx];
cpi->vaq_refresh = 0;
@@ -464,6 +453,20 @@ static void dealloc_compressor_data(AV1_COMP *cpi) {
aom_free(cpi->active_map.map);
cpi->active_map.map = NULL;
+#if CONFIG_MOTION_VAR
+ aom_free(cpi->td.mb.above_pred_buf);
+ cpi->td.mb.above_pred_buf = NULL;
+
+ aom_free(cpi->td.mb.left_pred_buf);
+ cpi->td.mb.left_pred_buf = NULL;
+
+ aom_free(cpi->td.mb.wsrc_buf);
+ cpi->td.mb.wsrc_buf = NULL;
+
+ aom_free(cpi->td.mb.mask_buf);
+ cpi->td.mb.mask_buf = NULL;
+#endif
+
// Free up-sampled reference buffers.
for (i = 0; i < (REF_FRAMES + 1); i++)
aom_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf);
@@ -492,17 +495,12 @@ static void dealloc_compressor_data(AV1_COMP *cpi) {
cpi->tile_tok[0][0] = 0;
av1_free_pc_tree(&cpi->td);
- av1_free_var_tree(&cpi->td);
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools)
aom_free(cpi->td.mb.palette_buffer);
#endif // CONFIG_PALETTE
- if (cpi->source_diff_var != NULL) {
- aom_free(cpi->source_diff_var);
- cpi->source_diff_var = NULL;
- }
#if CONFIG_ANS
aom_buf_ans_free(&cpi->buf_ans);
#endif // CONFIG_ANS
@@ -511,26 +509,17 @@ static void dealloc_compressor_data(AV1_COMP *cpi) {
static void save_coding_context(AV1_COMP *cpi) {
CODING_CONTEXT *const cc = &cpi->coding_context;
AV1_COMMON *cm = &cpi->common;
-#if CONFIG_REF_MV
int i;
-#endif
-// Stores a snapshot of key state variables which can subsequently be
-// restored with a call to av1_restore_coding_context. These functions are
-// intended for use in a re-code loop in av1_compress_frame where the
-// quantizer value is adjusted between loop iterations.
-#if CONFIG_REF_MV
+ // Stores a snapshot of key state variables which can subsequently be
+ // restored with a call to av1_restore_coding_context. These functions are
+ // intended for use in a re-code loop in av1_compress_frame where the
+ // quantizer value is adjusted between loop iterations.
for (i = 0; i < NMV_CONTEXTS; ++i) {
av1_copy(cc->nmv_vec_cost[i], cpi->td.mb.nmv_vec_cost[i]);
av1_copy(cc->nmv_costs, cpi->nmv_costs);
av1_copy(cc->nmv_costs_hp, cpi->nmv_costs_hp);
}
-#else
- av1_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
-#endif
-
- av1_copy(cc->nmvcosts, cpi->nmvcosts);
- av1_copy(cc->nmvcosts_hp, cpi->nmvcosts_hp);
av1_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
av1_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
@@ -541,24 +530,15 @@ static void save_coding_context(AV1_COMP *cpi) {
static void restore_coding_context(AV1_COMP *cpi) {
CODING_CONTEXT *const cc = &cpi->coding_context;
AV1_COMMON *cm = &cpi->common;
-#if CONFIG_REF_MV
int i;
-#endif
-// Restore key state variables to the snapshot state stored in the
-// previous call to av1_save_coding_context.
-#if CONFIG_REF_MV
+ // Restore key state variables to the snapshot state stored in the
+ // previous call to av1_save_coding_context.
for (i = 0; i < NMV_CONTEXTS; ++i) {
av1_copy(cpi->td.mb.nmv_vec_cost[i], cc->nmv_vec_cost[i]);
av1_copy(cpi->nmv_costs, cc->nmv_costs);
av1_copy(cpi->nmv_costs_hp, cc->nmv_costs_hp);
}
-#else
- av1_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
-#endif
-
- av1_copy(cpi->nmvcosts, cc->nmvcosts);
- av1_copy(cpi->nmvcosts_hp, cc->nmvcosts_hp);
av1_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
av1_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
@@ -795,14 +775,12 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) {
"Failed to allocate scaled last source buffer");
}
-static int alloc_context_buffers_ext(AV1_COMP *cpi) {
+static void alloc_context_buffers_ext(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
int mi_size = cm->mi_cols * cm->mi_rows;
- cpi->mbmi_ext_base = aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
- if (!cpi->mbmi_ext_base) return 1;
-
- return 0;
+ CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base,
+ aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)));
}
void av1_alloc_compressor_data(AV1_COMP *cpi) {
@@ -902,7 +880,11 @@ static void set_tile_info(AV1_COMP *cpi) {
#if CONFIG_DEPENDENT_HORZTILES
cm->dependent_horz_tiles = cpi->oxcf.dependent_horz_tiles;
+#if CONFIG_EXT_TILE
+ if (cm->tile_rows <= 1) cm->dependent_horz_tiles = 0;
+#else
if (cm->log2_tile_rows == 0) cm->dependent_horz_tiles = 0;
+#endif
#if CONFIG_TILE_GROUPS
if (cpi->oxcf.mtu == 0) {
cm->num_tg = cpi->oxcf.num_tile_groups;
@@ -1194,48 +1176,53 @@ MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad4x4x8)
MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d)
#if CONFIG_EXT_INTER
-#define HIGHBD_MBFP(BT, MSDF, MVF, MSVF) \
- cpi->fn_ptr[BT].msdf = MSDF; \
- cpi->fn_ptr[BT].mvf = MVF; \
- cpi->fn_ptr[BT].msvf = MSVF;
-
-#define MAKE_MBFP_SAD_WRAPPER(fnname) \
- static unsigned int fnname##_bits8( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *m, int m_stride) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride); \
- } \
- static unsigned int fnname##_bits10( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *m, int m_stride) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride) >> \
- 2; \
- } \
- static unsigned int fnname##_bits12( \
- const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
- int ref_stride, const uint8_t *m, int m_stride) { \
- return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride) >> \
- 4; \
+#define HIGHBD_MBFP(BT, MCSDF, MCSVF) \
+ cpi->fn_ptr[BT].msdf = MCSDF; \
+ cpi->fn_ptr[BT].msvf = MCSVF;
+
+#define MAKE_MBFP_COMPOUND_SAD_WRAPPER(fnname) \
+ static unsigned int fnname##_bits8( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \
+ int m_stride, int invert_mask) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+ second_pred_ptr, m, m_stride, invert_mask); \
+ } \
+ static unsigned int fnname##_bits10( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \
+ int m_stride, int invert_mask) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+ second_pred_ptr, m, m_stride, invert_mask) >> \
+ 2; \
+ } \
+ static unsigned int fnname##_bits12( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \
+ int m_stride, int invert_mask) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \
+ second_pred_ptr, m, m_stride, invert_mask) >> \
+ 4; \
}
#if CONFIG_EXT_PARTITION
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad128x128)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad128x64)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad64x128)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x128)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x64)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x128)
#endif // CONFIG_EXT_PARTITION
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad64x64)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad64x32)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad32x64)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad32x32)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad32x16)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad16x32)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad16x16)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad16x8)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad8x16)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad8x8)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad8x4)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad4x8)
-MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad4x4)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x64)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x32)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x64)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x32)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x16)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x32)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x16)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x8)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x16)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x8)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x4)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x8)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x4)
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
@@ -1401,54 +1388,38 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
#if CONFIG_EXT_INTER
#if CONFIG_EXT_PARTITION
HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits8,
- aom_highbd_masked_variance128x128,
- aom_highbd_masked_sub_pixel_variance128x128)
+ aom_highbd_8_masked_sub_pixel_variance128x128)
HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits8,
- aom_highbd_masked_variance128x64,
- aom_highbd_masked_sub_pixel_variance128x64)
+ aom_highbd_8_masked_sub_pixel_variance128x64)
HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits8,
- aom_highbd_masked_variance64x128,
- aom_highbd_masked_sub_pixel_variance64x128)
+ aom_highbd_8_masked_sub_pixel_variance64x128)
#endif // CONFIG_EXT_PARTITION
HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits8,
- aom_highbd_masked_variance64x64,
- aom_highbd_masked_sub_pixel_variance64x64)
+ aom_highbd_8_masked_sub_pixel_variance64x64)
HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits8,
- aom_highbd_masked_variance64x32,
- aom_highbd_masked_sub_pixel_variance64x32)
+ aom_highbd_8_masked_sub_pixel_variance64x32)
HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits8,
- aom_highbd_masked_variance32x64,
- aom_highbd_masked_sub_pixel_variance32x64)
+ aom_highbd_8_masked_sub_pixel_variance32x64)
HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits8,
- aom_highbd_masked_variance32x32,
- aom_highbd_masked_sub_pixel_variance32x32)
+ aom_highbd_8_masked_sub_pixel_variance32x32)
HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits8,
- aom_highbd_masked_variance32x16,
- aom_highbd_masked_sub_pixel_variance32x16)
+ aom_highbd_8_masked_sub_pixel_variance32x16)
HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits8,
- aom_highbd_masked_variance16x32,
- aom_highbd_masked_sub_pixel_variance16x32)
+ aom_highbd_8_masked_sub_pixel_variance16x32)
HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits8,
- aom_highbd_masked_variance16x16,
- aom_highbd_masked_sub_pixel_variance16x16)
+ aom_highbd_8_masked_sub_pixel_variance16x16)
HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits8,
- aom_highbd_masked_variance8x16,
- aom_highbd_masked_sub_pixel_variance8x16)
+ aom_highbd_8_masked_sub_pixel_variance8x16)
HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits8,
- aom_highbd_masked_variance16x8,
- aom_highbd_masked_sub_pixel_variance16x8)
+ aom_highbd_8_masked_sub_pixel_variance16x8)
HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits8,
- aom_highbd_masked_variance8x8,
- aom_highbd_masked_sub_pixel_variance8x8)
+ aom_highbd_8_masked_sub_pixel_variance8x8)
HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits8,
- aom_highbd_masked_variance4x8,
- aom_highbd_masked_sub_pixel_variance4x8)
+ aom_highbd_8_masked_sub_pixel_variance4x8)
HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits8,
- aom_highbd_masked_variance8x4,
- aom_highbd_masked_sub_pixel_variance8x4)
+ aom_highbd_8_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8,
- aom_highbd_masked_variance4x4,
- aom_highbd_masked_sub_pixel_variance4x4)
+ aom_highbd_8_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
#if CONFIG_EXT_PARTITION
@@ -1624,53 +1595,37 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
#if CONFIG_EXT_INTER
#if CONFIG_EXT_PARTITION
HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits10,
- aom_highbd_10_masked_variance128x128,
aom_highbd_10_masked_sub_pixel_variance128x128)
HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits10,
- aom_highbd_10_masked_variance128x64,
aom_highbd_10_masked_sub_pixel_variance128x64)
HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits10,
- aom_highbd_10_masked_variance64x128,
aom_highbd_10_masked_sub_pixel_variance64x128)
#endif // CONFIG_EXT_PARTITION
HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits10,
- aom_highbd_10_masked_variance64x64,
aom_highbd_10_masked_sub_pixel_variance64x64)
HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits10,
- aom_highbd_10_masked_variance64x32,
aom_highbd_10_masked_sub_pixel_variance64x32)
HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits10,
- aom_highbd_10_masked_variance32x64,
aom_highbd_10_masked_sub_pixel_variance32x64)
HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits10,
- aom_highbd_10_masked_variance32x32,
aom_highbd_10_masked_sub_pixel_variance32x32)
HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits10,
- aom_highbd_10_masked_variance32x16,
aom_highbd_10_masked_sub_pixel_variance32x16)
HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits10,
- aom_highbd_10_masked_variance16x32,
aom_highbd_10_masked_sub_pixel_variance16x32)
HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits10,
- aom_highbd_10_masked_variance16x16,
aom_highbd_10_masked_sub_pixel_variance16x16)
HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits10,
- aom_highbd_10_masked_variance8x16,
aom_highbd_10_masked_sub_pixel_variance8x16)
HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits10,
- aom_highbd_10_masked_variance16x8,
aom_highbd_10_masked_sub_pixel_variance16x8)
HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits10,
- aom_highbd_10_masked_variance8x8,
aom_highbd_10_masked_sub_pixel_variance8x8)
HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits10,
- aom_highbd_10_masked_variance4x8,
aom_highbd_10_masked_sub_pixel_variance4x8)
HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits10,
- aom_highbd_10_masked_variance8x4,
aom_highbd_10_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10,
- aom_highbd_10_masked_variance4x4,
aom_highbd_10_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR
@@ -1847,53 +1802,37 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
#if CONFIG_EXT_INTER
#if CONFIG_EXT_PARTITION
HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits12,
- aom_highbd_12_masked_variance128x128,
aom_highbd_12_masked_sub_pixel_variance128x128)
HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits12,
- aom_highbd_12_masked_variance128x64,
aom_highbd_12_masked_sub_pixel_variance128x64)
HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits12,
- aom_highbd_12_masked_variance64x128,
aom_highbd_12_masked_sub_pixel_variance64x128)
#endif // CONFIG_EXT_PARTITION
HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits12,
- aom_highbd_12_masked_variance64x64,
aom_highbd_12_masked_sub_pixel_variance64x64)
HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits12,
- aom_highbd_12_masked_variance64x32,
aom_highbd_12_masked_sub_pixel_variance64x32)
HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits12,
- aom_highbd_12_masked_variance32x64,
aom_highbd_12_masked_sub_pixel_variance32x64)
HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits12,
- aom_highbd_12_masked_variance32x32,
aom_highbd_12_masked_sub_pixel_variance32x32)
HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits12,
- aom_highbd_12_masked_variance32x16,
aom_highbd_12_masked_sub_pixel_variance32x16)
HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits12,
- aom_highbd_12_masked_variance16x32,
aom_highbd_12_masked_sub_pixel_variance16x32)
HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits12,
- aom_highbd_12_masked_variance16x16,
aom_highbd_12_masked_sub_pixel_variance16x16)
HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits12,
- aom_highbd_12_masked_variance8x16,
aom_highbd_12_masked_sub_pixel_variance8x16)
HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits12,
- aom_highbd_12_masked_variance16x8,
aom_highbd_12_masked_sub_pixel_variance16x8)
HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits12,
- aom_highbd_12_masked_variance8x8,
aom_highbd_12_masked_sub_pixel_variance8x8)
HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits12,
- aom_highbd_12_masked_variance4x8,
aom_highbd_12_masked_sub_pixel_variance4x8)
HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits12,
- aom_highbd_12_masked_variance8x4,
aom_highbd_12_masked_sub_pixel_variance8x4)
HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12,
- aom_highbd_12_masked_variance4x4,
aom_highbd_12_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
@@ -1979,6 +1918,18 @@ static void realloc_segmentation_maps(AV1_COMP *cpi) {
aom_calloc(cm->mi_rows * cm->mi_cols, 1));
}
+#if CONFIG_EXT_INTER
+void set_compound_tools(AV1_COMMON *cm) {
+ (void)cm;
+#if CONFIG_INTERINTRA
+ cm->allow_interintra_compound = 1;
+#endif // CONFIG_INTERINTRA
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+ cm->allow_masked_compound = 1;
+#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+}
+#endif // CONFIG_EXT_INTER
+
void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -1994,9 +1945,7 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
assert(cm->bit_depth > AOM_BITS_8);
cpi->oxcf = *oxcf;
-#if CONFIG_HIGHBITDEPTH
cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
-#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_GLOBAL_MOTION
cpi->td.mb.e_mbd.global_motion = cm->global_motion;
#endif // CONFIG_GLOBAL_MOTION
@@ -2033,7 +1982,9 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
av1_setup_pc_tree(&cpi->common, &cpi->td);
}
#endif // CONFIG_PALETTE
-
+#if CONFIG_EXT_INTER
+ set_compound_tools(cm);
+#endif // CONFIG_EXT_INTER
av1_reset_segment_features(cm);
av1_set_high_precision_mv(cpi, 0);
@@ -2107,50 +2058,6 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
#endif // CONFIG_ANS && ANS_MAX_SYMBOLS
}
-#ifndef M_LOG2_E
-#define M_LOG2_E 0.693147180559945309417
-#endif
-#define log2f(x) (log(x) / (float)M_LOG2_E)
-
-#if !CONFIG_REF_MV
-static void cal_nmvjointsadcost(int *mvjointsadcost) {
- mvjointsadcost[0] = 600;
- mvjointsadcost[1] = 300;
- mvjointsadcost[2] = 300;
- mvjointsadcost[3] = 300;
-}
-#endif
-
-static void cal_nmvsadcosts(int *mvsadcost[2]) {
- int i = 1;
-
- mvsadcost[0][0] = 0;
- mvsadcost[1][0] = 0;
-
- do {
- double z = 256 * (2 * (log2f(8 * i) + .6));
- mvsadcost[0][i] = (int)z;
- mvsadcost[1][i] = (int)z;
- mvsadcost[0][-i] = (int)z;
- mvsadcost[1][-i] = (int)z;
- } while (++i <= MV_MAX);
-}
-
-static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
- int i = 1;
-
- mvsadcost[0][0] = 0;
- mvsadcost[1][0] = 0;
-
- do {
- double z = 256 * (2 * (log2f(8 * i) + .6));
- mvsadcost[0][i] = (int)z;
- mvsadcost[1][i] = (int)z;
- mvsadcost[0][-i] = (int)z;
- mvsadcost[1][-i] = (int)z;
- } while (++i <= MV_MAX);
-}
-
static INLINE void init_upsampled_ref_frame_bufs(AV1_COMP *cpi) {
int i;
@@ -2192,6 +2099,11 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
cpi->resize_state = 0;
cpi->resize_avg_qp = 0;
cpi->resize_buffer_underflow = 0;
+ cpi->resize_scale_num = 16;
+ cpi->resize_scale_den = 16;
+ cpi->resize_next_scale_num = 16;
+ cpi->resize_next_scale_den = 16;
+
cpi->common.buffer_pool = pool;
init_config(cpi, oxcf);
@@ -2223,17 +2135,10 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
realloc_segmentation_maps(cpi);
-#if CONFIG_REF_MV
for (i = 0; i < NMV_CONTEXTS; ++i) {
memset(cpi->nmv_costs, 0, sizeof(cpi->nmv_costs));
memset(cpi->nmv_costs_hp, 0, sizeof(cpi->nmv_costs_hp));
}
-#endif
-
- memset(cpi->nmvcosts, 0, sizeof(cpi->nmvcosts));
- memset(cpi->nmvcosts_hp, 0, sizeof(cpi->nmvcosts_hp));
- memset(cpi->nmvsadcosts, 0, sizeof(cpi->nmvsadcosts));
- memset(cpi->nmvsadcosts_hp, 0, sizeof(cpi->nmvsadcosts_hp));
for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
i++) {
@@ -2296,27 +2201,12 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
cpi->first_time_stamp_ever = INT64_MAX;
-#if CONFIG_REF_MV
for (i = 0; i < NMV_CONTEXTS; ++i) {
cpi->td.mb.nmvcost[i][0] = &cpi->nmv_costs[i][0][MV_MAX];
cpi->td.mb.nmvcost[i][1] = &cpi->nmv_costs[i][1][MV_MAX];
cpi->td.mb.nmvcost_hp[i][0] = &cpi->nmv_costs_hp[i][0][MV_MAX];
cpi->td.mb.nmvcost_hp[i][1] = &cpi->nmv_costs_hp[i][1][MV_MAX];
}
-#else
- cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
- cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
- cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
- cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
- cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
-#endif
- cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
- cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
- cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
-
- cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
- cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
- cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
#ifdef OUTPUT_YUV_SKINMAP
yuv_skinmap_file = fopen("skinmap.yuv", "ab");
@@ -2363,17 +2253,36 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
}
#endif
+#if CONFIG_MOTION_VAR
+#if CONFIG_HIGHBITDEPTH
+ int buf_scaler = 2;
+#else
+ int buf_scaler = 1;
+#endif
+ CHECK_MEM_ERROR(
+ cm, cpi->td.mb.above_pred_buf,
+ (uint8_t *)aom_memalign(16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
+ sizeof(*cpi->td.mb.above_pred_buf)));
+ CHECK_MEM_ERROR(
+ cm, cpi->td.mb.left_pred_buf,
+ (uint8_t *)aom_memalign(16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
+ sizeof(*cpi->td.mb.left_pred_buf)));
+
+ CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf,
+ (int32_t *)aom_memalign(
+ 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf)));
+
+ CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf,
+ (int32_t *)aom_memalign(
+ 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf)));
+
+#endif
+
init_upsampled_ref_frame_bufs(cpi);
av1_set_speed_features_framesize_independent(cpi);
av1_set_speed_features_framesize_dependent(cpi);
- // Allocate memory to store variances for a frame.
- CHECK_MEM_ERROR(cm, cpi->source_diff_var,
- aom_calloc(cm->MBs, sizeof(*cpi->source_diff_var)));
- cpi->source_var_thresh = 0;
- cpi->frames_till_next_var_check = 0;
-
#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
@@ -2499,45 +2408,29 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
#endif // CONFIG_MOTION_VAR
#if CONFIG_EXT_INTER
-#define MBFP(BT, MSDF, MVF, MSVF) \
- cpi->fn_ptr[BT].msdf = MSDF; \
- cpi->fn_ptr[BT].mvf = MVF; \
- cpi->fn_ptr[BT].msvf = MSVF;
+#define MBFP(BT, MCSDF, MCSVF) \
+ cpi->fn_ptr[BT].msdf = MCSDF; \
+ cpi->fn_ptr[BT].msvf = MCSVF;
#if CONFIG_EXT_PARTITION
- MBFP(BLOCK_128X128, aom_masked_sad128x128, aom_masked_variance128x128,
+ MBFP(BLOCK_128X128, aom_masked_sad128x128,
aom_masked_sub_pixel_variance128x128)
- MBFP(BLOCK_128X64, aom_masked_sad128x64, aom_masked_variance128x64,
- aom_masked_sub_pixel_variance128x64)
- MBFP(BLOCK_64X128, aom_masked_sad64x128, aom_masked_variance64x128,
- aom_masked_sub_pixel_variance64x128)
+ MBFP(BLOCK_128X64, aom_masked_sad128x64, aom_masked_sub_pixel_variance128x64)
+ MBFP(BLOCK_64X128, aom_masked_sad64x128, aom_masked_sub_pixel_variance64x128)
#endif // CONFIG_EXT_PARTITION
- MBFP(BLOCK_64X64, aom_masked_sad64x64, aom_masked_variance64x64,
- aom_masked_sub_pixel_variance64x64)
- MBFP(BLOCK_64X32, aom_masked_sad64x32, aom_masked_variance64x32,
- aom_masked_sub_pixel_variance64x32)
- MBFP(BLOCK_32X64, aom_masked_sad32x64, aom_masked_variance32x64,
- aom_masked_sub_pixel_variance32x64)
- MBFP(BLOCK_32X32, aom_masked_sad32x32, aom_masked_variance32x32,
- aom_masked_sub_pixel_variance32x32)
- MBFP(BLOCK_32X16, aom_masked_sad32x16, aom_masked_variance32x16,
- aom_masked_sub_pixel_variance32x16)
- MBFP(BLOCK_16X32, aom_masked_sad16x32, aom_masked_variance16x32,
- aom_masked_sub_pixel_variance16x32)
- MBFP(BLOCK_16X16, aom_masked_sad16x16, aom_masked_variance16x16,
- aom_masked_sub_pixel_variance16x16)
- MBFP(BLOCK_16X8, aom_masked_sad16x8, aom_masked_variance16x8,
- aom_masked_sub_pixel_variance16x8)
- MBFP(BLOCK_8X16, aom_masked_sad8x16, aom_masked_variance8x16,
- aom_masked_sub_pixel_variance8x16)
- MBFP(BLOCK_8X8, aom_masked_sad8x8, aom_masked_variance8x8,
- aom_masked_sub_pixel_variance8x8)
- MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_variance4x8,
- aom_masked_sub_pixel_variance4x8)
- MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_variance8x4,
- aom_masked_sub_pixel_variance8x4)
- MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_variance4x4,
- aom_masked_sub_pixel_variance4x4)
+ MBFP(BLOCK_64X64, aom_masked_sad64x64, aom_masked_sub_pixel_variance64x64)
+ MBFP(BLOCK_64X32, aom_masked_sad64x32, aom_masked_sub_pixel_variance64x32)
+ MBFP(BLOCK_32X64, aom_masked_sad32x64, aom_masked_sub_pixel_variance32x64)
+ MBFP(BLOCK_32X32, aom_masked_sad32x32, aom_masked_sub_pixel_variance32x32)
+ MBFP(BLOCK_32X16, aom_masked_sad32x16, aom_masked_sub_pixel_variance32x16)
+ MBFP(BLOCK_16X32, aom_masked_sad16x32, aom_masked_sub_pixel_variance16x32)
+ MBFP(BLOCK_16X16, aom_masked_sad16x16, aom_masked_sub_pixel_variance16x16)
+ MBFP(BLOCK_16X8, aom_masked_sad16x8, aom_masked_sub_pixel_variance16x8)
+ MBFP(BLOCK_8X16, aom_masked_sad8x16, aom_masked_sub_pixel_variance8x16)
+ MBFP(BLOCK_8X8, aom_masked_sad8x8, aom_masked_sub_pixel_variance8x8)
+ MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_sub_pixel_variance4x8)
+ MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_sub_pixel_variance8x4)
+ MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
#if CONFIG_HIGHBITDEPTH
@@ -2555,6 +2448,9 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
#endif
av1_loop_filter_init(cm);
+#if CONFIG_FRAME_SUPERRES
+ cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR;
+#endif // CONFIG_FRAME_SUPERRES
#if CONFIG_LOOP_RESTORATION
av1_loop_restoration_precal();
#endif // CONFIG_LOOP_RESTORATION
@@ -2671,11 +2567,16 @@ void av1_remove_compressor(AV1_COMP *cpi) {
if (t < cpi->num_workers - 1) {
#if CONFIG_PALETTE
if (cpi->common.allow_screen_content_tools)
- aom_free(thread_data->td->mb.palette_buffer);
+ aom_free(thread_data->td->palette_buffer);
#endif // CONFIG_PALETTE
+#if CONFIG_MOTION_VAR
+ aom_free(thread_data->td->above_pred_buf);
+ aom_free(thread_data->td->left_pred_buf);
+ aom_free(thread_data->td->wsrc_buf);
+ aom_free(thread_data->td->mask_buf);
+#endif // CONFIG_MOTION_VAR
aom_free(thread_data->td->counts);
av1_free_pc_tree(thread_data->td);
- av1_free_var_tree(thread_data->td);
aom_free(thread_data->td);
}
}
@@ -2935,48 +2836,6 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) {
#endif // OUTPUT_YUV_REC
#if CONFIG_HIGHBITDEPTH
-static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst,
- int bd) {
-#else
-static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst) {
-#endif // CONFIG_HIGHBITDEPTH
- // TODO(dkovalev): replace YV12_BUFFER_CONFIG with aom_image_t
- int i;
- const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
- src->v_buffer };
- const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
- const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
- src->uv_crop_width };
- const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
- src->uv_crop_height };
- uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
- const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
- const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
- dst->uv_crop_width };
- const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
- dst->uv_crop_height };
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
-#if CONFIG_HIGHBITDEPTH
- if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
- av1_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
- src_strides[i], dsts[i], dst_heights[i],
- dst_widths[i], dst_strides[i], bd);
- } else {
- av1_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
- dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
- }
-#else
- av1_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
- dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
-#endif // CONFIG_HIGHBITDEPTH
- }
- aom_extend_frame_borders(dst);
-}
-
-#if CONFIG_HIGHBITDEPTH
static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst, int planes,
int bd) {
@@ -3041,22 +2900,6 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
aom_extend_frame_borders(dst);
}
-static int scale_down(AV1_COMP *cpi, int q) {
- RATE_CONTROL *const rc = &cpi->rc;
- GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- int scale = 0;
- assert(frame_is_kf_gf_arf(cpi));
-
- if (rc->frame_size_selector == UNSCALED &&
- q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
- const int max_size_thresh =
- (int)(rate_thresh_mult[SCALE_STEP1] *
- AOMMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
- scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
- }
- return scale;
-}
-
#if CONFIG_GLOBAL_MOTION
#define GM_RECODE_LOOP_NUM4X4_FACTOR 192
static int recode_loop_test_global_motion(AV1_COMP *cpi) {
@@ -3070,11 +2913,8 @@ static int recode_loop_test_global_motion(AV1_COMP *cpi) {
cpi->gmparams_cost[i]) {
set_default_warp_params(&cm->global_motion[i]);
cpi->gmparams_cost[i] = 0;
-#if CONFIG_REF_MV
recode = 1;
-#else
recode |= (rdc->global_motion_used[i] > 0);
-#endif
}
}
return recode;
@@ -3093,13 +2933,6 @@ static int recode_loop_test(AV1_COMP *cpi, int high_limit, int low_limit, int q,
if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
(cpi->sf.recode_loop == ALLOW_RECODE) ||
(frame_is_kfgfarf && (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) {
- if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
- scale_down(cpi, q)) {
- // Code this group at a lower resolution.
- cpi->resize_pending = 1;
- return 1;
- }
-
// TODO(agrange) high_limit could be greater than the scale-down threshold.
if ((rc->projected_frame_size > high_limit && q < maxq) ||
(rc->projected_frame_size < low_limit && q > minq)) {
@@ -3863,6 +3696,9 @@ static void set_size_independent_vars(AV1_COMP *cpi) {
av1_set_rd_speed_thresholds(cpi);
av1_set_rd_speed_thresholds_sub8x8(cpi);
cpi->common.interp_filter = cpi->sf.default_interp_filter;
+#if CONFIG_EXT_INTER
+ if (!frame_is_intra_only(&cpi->common)) set_compound_tools(&cpi->common);
+#endif // CONFIG_EXT_INTER
}
static void set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index,
@@ -3916,43 +3752,52 @@ static void set_restoration_tilesize(int width, int height,
}
#endif // CONFIG_LOOP_RESTORATION
-static void set_frame_size(AV1_COMP *cpi) {
- int ref_frame;
+static void set_scaled_size(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
AV1EncoderConfig *const oxcf = &cpi->oxcf;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- if (oxcf->pass == 2 && oxcf->rc_mode == AOM_VBR &&
- ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
- (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
- av1_calculate_coded_size(cpi, &oxcf->scaled_frame_width,
- &oxcf->scaled_frame_height);
-
- // There has been a change in frame size.
- av1_set_size_literal(cpi, oxcf->scaled_frame_width,
- oxcf->scaled_frame_height);
+ // TODO(afergs): Replace with call to av1_resize_pending? Could replace
+ // scaled_size_set as well.
+ // TODO(afergs): Realistically, if resize_pending is true, then the other
+ // conditions must already be satisfied.
+ // Try this first:
+ // av1_resize_pending &&
+ // (DYNAMIC && (1 Pass CBR || 2 Pass VBR)
+ // STATIC && FIRST_FRAME)
+ // Really, av1_resize_pending should just reflect the above.
+ // TODO(afergs): Allow fixed resizing in AOM_CBR mode?
+ // 2 Pass VBR: Resize if fixed resize and first frame, or dynamic resize and
+ // a resize is pending.
+ // 1 Pass CBR: Resize if dynamic resize and resize pending.
+ if ((oxcf->pass == 2 && oxcf->rc_mode == AOM_VBR &&
+ ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
+ (oxcf->resize_mode == RESIZE_DYNAMIC && av1_resize_pending(cpi)))) ||
+ (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR &&
+ oxcf->resize_mode == RESIZE_DYNAMIC && av1_resize_pending(cpi))) {
+ // TODO(afergs): This feels hacky... Should it just set? Should
+ // av1_set_next_scaled_size be a library function?
+ av1_calculate_next_scaled_size(cpi, &oxcf->scaled_frame_width,
+ &oxcf->scaled_frame_height);
}
+}
- if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR &&
- oxcf->resize_mode == RESIZE_DYNAMIC) {
- if (cpi->resize_pending == 1) {
- oxcf->scaled_frame_width =
- (cm->width * cpi->resize_scale_num) / cpi->resize_scale_den;
- oxcf->scaled_frame_height =
- (cm->height * cpi->resize_scale_num) / cpi->resize_scale_den;
- } else if (cpi->resize_pending == -1) {
- // Go back up to original size.
- oxcf->scaled_frame_width = oxcf->width;
- oxcf->scaled_frame_height = oxcf->height;
- }
- if (cpi->resize_pending != 0) {
- // There has been a change in frame size.
- av1_set_size_literal(cpi, oxcf->scaled_frame_width,
- oxcf->scaled_frame_height);
+static void set_frame_size(AV1_COMP *cpi, int width, int height) {
+ int ref_frame;
+ AV1_COMMON *const cm = &cpi->common;
+ AV1EncoderConfig *const oxcf = &cpi->oxcf;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
- set_mv_search_params(cpi);
- }
+ if (width != cm->width || height != cm->height) {
+ // There has been a change in the encoded frame size
+ av1_set_size_literal(cpi, width, height);
+
+ // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
+ // TODO(afergs): Make condition just (pass == 0) or (rc_mode == CBR) -
+ // UNLESS CBR starts allowing FIXED resizing. Then the resize
+ // mode will need to get checked too.
+ if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR &&
+ oxcf->resize_mode == RESIZE_DYNAMIC)
+ set_mv_search_params(cpi); // TODO(afergs): Needed? Caller calls after...
}
#if !CONFIG_XIPHRC
@@ -4012,10 +3857,33 @@ static void set_frame_size(AV1_COMP *cpi) {
ref_buf->buf = NULL;
}
}
+#if CONFIG_INTRABC
+#if CONFIG_HIGHBITDEPTH
+ av1_setup_scale_factors_for_frame(&xd->sf_identity, cm->width, cm->height,
+ cm->width, cm->height,
+ cm->use_highbitdepth);
+#else
+ av1_setup_scale_factors_for_frame(&xd->sf_identity, cm->width, cm->height,
+ cm->width, cm->height);
+#endif // CONFIG_HIGHBITDEPTH
+#endif // CONFIG_INTRABC
set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
}
+static void setup_frame_size(AV1_COMP *cpi) {
+ set_scaled_size(cpi);
+#if CONFIG_FRAME_SUPERRES
+ int encode_width;
+ int encode_height;
+ av1_calculate_superres_size(cpi, &encode_width, &encode_height);
+ set_frame_size(cpi, encode_width, encode_height);
+#else
+ set_frame_size(cpi, cpi->oxcf.scaled_frame_width,
+ cpi->oxcf.scaled_frame_height);
+#endif // CONFIG_FRAME_SUPERRES
+}
+
static void reset_use_upsampled_references(AV1_COMP *cpi) {
MV_REFERENCE_FRAME ref_frame;
@@ -4039,7 +3907,15 @@ static void encode_without_recode_loop(AV1_COMP *cpi) {
aom_clear_system_state();
- set_frame_size(cpi);
+#if CONFIG_FRAME_SUPERRES
+ // TODO(afergs): Figure out when is actually a good time to do superres
+ cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR;
+ // (uint8_t)(rand() % 9 + SUPERRES_SCALE_NUMERATOR_MIN);
+ cpi->superres_pending = cpi->oxcf.superres_enabled && 0;
+#endif // CONFIG_FRAME_SUPERRES
+
+ setup_frame_size(cpi);
+ av1_resize_step(cpi);
// For 1 pass CBR under dynamic resize mode: use faster scaling for source.
// Only for 2x2 scaling for now.
@@ -4075,19 +3951,9 @@ static void encode_without_recode_loop(AV1_COMP *cpi) {
reset_use_upsampled_references(cpi);
av1_set_quantizer(cm, q);
- av1_set_variance_partition_thresholds(cpi, q);
-
setup_frame(cpi);
-
-#if CONFIG_SUBFRAME_PROB_UPDATE
- cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1;
- av1_copy(cm->starting_coef_probs, cm->fc->coef_probs);
- av1_copy(cpi->subframe_stats.enc_starting_coef_probs, cm->fc->coef_probs);
- cm->coef_probs_update_idx = 0;
- av1_copy(cpi->subframe_stats.coef_probs_buf[0], cm->fc->coef_probs);
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
-
suppress_active_map(cpi);
+
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@@ -4102,6 +3968,11 @@ static void encode_without_recode_loop(AV1_COMP *cpi) {
// transform / motion compensation build reconstruction frame
av1_encode_frame(cpi);
+#if CONFIG_FRAME_SUPERRES
+ // TODO(afergs): Upscale the frame to show
+ cpi->superres_pending = 0;
+#endif // CONFIG_FRAME_SUPERRES
+
// Update some stats from cyclic refresh, and check if we should not update
// golden reference, for 1 pass CBR.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME &&
@@ -4136,9 +4007,13 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
do {
aom_clear_system_state();
- set_frame_size(cpi);
+ setup_frame_size(cpi);
- if (loop_count == 0 || cpi->resize_pending != 0) {
+#if CONFIG_FRAME_SUPERRES
+ if (loop_count == 0 || av1_resize_pending(cpi) || cpi->superres_pending) {
+#else
+ if (loop_count == 0 || av1_resize_pending(cpi)) {
+#endif // CONFIG_FRAME_SUPERRES
set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
// cpi->sf.use_upsampled_references can be different from frame to frame.
@@ -4159,8 +4034,8 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
undershoot_seen = 0;
#endif
- // Reconfiguration for change in frame size has concluded.
- cpi->resize_pending = 0;
+ // Advance resize to next state now that updates are done
+ av1_resize_step(cpi);
q_low = bottom_index;
q_high = top_index;
@@ -4208,26 +4083,6 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
}
#endif // CONFIG_Q_ADAPT_PROBS
-#if CONFIG_SUBFRAME_PROB_UPDATE
- cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1;
- if (loop_count == 0 || frame_is_intra_only(cm) ||
- cm->error_resilient_mode) {
- av1_copy(cm->starting_coef_probs, cm->fc->coef_probs);
- av1_copy(cpi->subframe_stats.enc_starting_coef_probs, cm->fc->coef_probs);
- } else {
- if (cm->do_subframe_update) {
- av1_copy(cm->fc->coef_probs,
- cpi->subframe_stats.enc_starting_coef_probs);
- av1_copy(cm->starting_coef_probs,
- cpi->subframe_stats.enc_starting_coef_probs);
- av1_zero(cpi->subframe_stats.coef_counts_buf);
- av1_zero(cpi->subframe_stats.eob_counts_buf);
- }
- }
- cm->coef_probs_update_idx = 0;
- av1_copy(cpi->subframe_stats.coef_probs_buf[0], cm->fc->coef_probs);
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
-
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@@ -4318,23 +4173,9 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
int last_q = q;
#if !CONFIG_XIPHRC
int retries = 0;
-#endif
- if (cpi->resize_pending == 1) {
- // Change in frame size so go back around the recode loop.
- cpi->rc.frame_size_selector =
- SCALE_STEP1 - cpi->rc.frame_size_selector;
- cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
+ // TODO(afergs): Replace removed recode when av1_resize_pending is true
-#if CONFIG_INTERNAL_STATS
- ++cpi->tot_recode_hits;
-#endif
- ++loop_count;
- loop = 1;
- continue;
- }
-
-#if !CONFIG_XIPHRC
// Frame size out of permitted range:
// Update correction factor & compute new Q to try...
// Frame is too large
@@ -4438,7 +4279,7 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) {
const int last3_is_last =
map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[0]];
const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[0]];
-#if CONFIG_LOWDELAY_COMPOUND
+#if CONFIG_ONE_SIDED_COMPOUND
const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]];
const int last3_is_last2 =
map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]];
@@ -4491,7 +4332,7 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) {
if (gld_is_last2 || gld_is_last3) flags &= ~AOM_GOLD_FLAG;
-#if CONFIG_LOWDELAY_COMPOUND // Changes LL & HL bitstream
+#if CONFIG_ONE_SIDED_COMPOUND // Changes LL & HL bitstream
/* Allow biprediction between two identical frames (e.g. bwd_is_last = 1) */
if (bwd_is_alt && (flags & AOM_BWD_FLAG)) flags &= ~AOM_BWD_FLAG;
#else
@@ -4522,36 +4363,6 @@ static void set_ext_overrides(AV1_COMP *cpi) {
}
}
-YV12_BUFFER_CONFIG *av1_scale_if_required_fast(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled) {
- if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
- cm->mi_rows * MI_SIZE != unscaled->y_height) {
- // For 2x2 scaling down.
- aom_scale_frame(unscaled, scaled, unscaled->y_buffer, 9, 2, 1, 2, 1, 0);
- aom_extend_frame_borders(scaled);
- return scaled;
- } else {
- return unscaled;
- }
-}
-
-YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled) {
- if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
- cm->mi_rows * MI_SIZE != unscaled->y_height) {
-#if CONFIG_HIGHBITDEPTH
- scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
-#else
- scale_and_extend_frame_nonnormative(unscaled, scaled);
-#endif // CONFIG_HIGHBITDEPTH
- return scaled;
- } else {
- return unscaled;
- }
-}
-
static void set_arf_sign_bias(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
int arf_sign_bias;
@@ -5014,9 +4825,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
av1_accumulate_frame_counts(&aggregate_fc, &cm->counts);
#endif // CONFIG_ENTROPY_STATS
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
-#if CONFIG_SUBFRAME_PROB_UPDATE
- cm->partial_prob_update = 0;
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
av1_adapt_coef_probs(cm);
av1_adapt_intra_frame_probs(cm);
#if CONFIG_EC_ADAPT
@@ -5767,7 +5575,8 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
#else
av1_rc_get_second_pass_params(cpi);
} else if (oxcf->pass == 1) {
- set_frame_size(cpi);
+ setup_frame_size(cpi);
+ av1_resize_step(cpi);
}
#endif
@@ -5900,8 +5709,7 @@ int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
return 0;
}
-int av1_set_size_literal(AV1_COMP *cpi, unsigned int width,
- unsigned int height) {
+int av1_set_size_literal(AV1_COMP *cpi, int width, int height) {
AV1_COMMON *cm = &cpi->common;
#if CONFIG_HIGHBITDEPTH
check_initial_width(cpi, cm->use_highbitdepth, 1, 1);
@@ -5909,21 +5717,20 @@ int av1_set_size_literal(AV1_COMP *cpi, unsigned int width,
check_initial_width(cpi, 1, 1);
#endif // CONFIG_HIGHBITDEPTH
- if (width) {
- cm->width = width;
- if (cm->width > cpi->initial_width) {
- cm->width = cpi->initial_width;
- printf("Warning: Desired width too large, changed to %d\n", cm->width);
- }
+ if (width <= 0 || height <= 0) return 1;
+
+ cm->width = width;
+ if (cm->width > cpi->initial_width) {
+ cm->width = cpi->initial_width;
+ printf("Warning: Desired width too large, changed to %d\n", cm->width);
}
- if (height) {
- cm->height = height;
- if (cm->height > cpi->initial_height) {
- cm->height = cpi->initial_height;
- printf("Warning: Desired height too large, changed to %d\n", cm->height);
- }
+ cm->height = height;
+ if (cm->height > cpi->initial_height) {
+ cm->height = cpi->initial_height;
+ printf("Warning: Desired height too large, changed to %d\n", cm->height);
}
+
assert(cm->width <= cpi->initial_width);
assert(cm->height <= cpi->initial_height);
diff --git a/third_party/aom/av1/encoder/encoder.h b/third_party/aom/av1/encoder/encoder.h
index 4e7aef8fc..ee1257c2d 100644
--- a/third_party/aom/av1/encoder/encoder.h
+++ b/third_party/aom/av1/encoder/encoder.h
@@ -37,7 +37,6 @@
#include "av1/encoder/rd.h"
#include "av1/encoder/speed_features.h"
#include "av1/encoder/tokenize.h"
-#include "av1/encoder/variance_tree.h"
#if CONFIG_XIPHRC
#include "av1/encoder/ratectrl_xiph.h"
#endif
@@ -54,15 +53,9 @@ extern "C" {
#endif
typedef struct {
- int nmvjointcost[MV_JOINTS];
- int nmvcosts[2][MV_VALS];
- int nmvcosts_hp[2][MV_VALS];
-
-#if CONFIG_REF_MV
int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
-#endif
// 0 = Intra, Last, GF, ARF
signed char last_ref_lf_deltas[TOTAL_REFS_PER_FRAME];
@@ -210,6 +203,11 @@ typedef struct AV1EncoderConfig {
int scaled_frame_width;
int scaled_frame_height;
+#if CONFIG_FRAME_SUPERRES
+ // Frame Super-Resolution size scaling
+ int superres_enabled;
+#endif // CONFIG_FRAME_SUPERRES
+
// Enable feature to reduce the frame quantization every x frames.
int frame_periodic_boost;
@@ -323,9 +321,16 @@ typedef struct ThreadData {
PICK_MODE_CONTEXT *leaf_tree;
PC_TREE *pc_tree;
PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
+#if CONFIG_MOTION_VAR
+ int32_t *wsrc_buf;
+ int32_t *mask_buf;
+ uint8_t *above_pred_buf;
+ uint8_t *left_pred_buf;
+#endif
- VAR_TREE *var_tree;
- VAR_TREE *var_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
+#if CONFIG_PALETTE
+ PALETTE_BUFFER *palette_buffer;
+#endif // CONFIG_PALETTE
} ThreadData;
struct EncWorkerData;
@@ -350,16 +355,6 @@ typedef struct {
YV12_BUFFER_CONFIG buf;
} EncRefCntBuffer;
-#if CONFIG_SUBFRAME_PROB_UPDATE
-typedef struct SUBFRAME_STATS {
- av1_coeff_probs_model coef_probs_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
- av1_coeff_count coef_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
- unsigned int eob_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES][REF_TYPES]
- [COEF_BANDS][COEFF_CONTEXTS];
- av1_coeff_probs_model enc_starting_coef_probs[TX_SIZES][PLANE_TYPES];
-} SUBFRAME_STATS;
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
-
typedef struct TileBufferEnc {
uint8_t *data;
size_t size;
@@ -369,14 +364,7 @@ typedef struct AV1_COMP {
QUANTS quants;
ThreadData td;
MB_MODE_INFO_EXT *mbmi_ext_base;
- DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width
- DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width
-#if CONFIG_NEW_QUANT
- DECLARE_ALIGNED(16, dequant_val_type_nuq,
- y_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]);
- DECLARE_ALIGNED(16, dequant_val_type_nuq,
- uv_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]);
-#endif // CONFIG_NEW_QUANT
+ Dequants dequants;
AV1_COMMON common;
AV1EncoderConfig oxcf;
struct lookahead_ctx *lookahead;
@@ -443,15 +431,8 @@ typedef struct AV1_COMP {
CODING_CONTEXT coding_context;
-#if CONFIG_REF_MV
int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
-#endif
-
- int nmvcosts[2][MV_VALS];
- int nmvcosts_hp[2][MV_VALS];
- int nmvsadcosts[2][MV_VALS];
- int nmvsadcosts_hp[2][MV_VALS];
int64_t last_time_stamp_seen;
int64_t last_end_time_stamp_seen;
@@ -543,29 +524,23 @@ typedef struct AV1_COMP {
// number of MBs in the current frame when the frame is
// scaled.
- // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
- DIFF *source_diff_var;
- // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
- unsigned int source_var_thresh;
- int frames_till_next_var_check;
-
int frame_flags;
search_site_config ss_cfg;
int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
-#if CONFIG_REF_MV
int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2];
int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
-#endif
unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
#if CONFIG_EXT_INTER
unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS]
[INTER_COMPOUND_MODES];
+#if CONFIG_INTERINTRA
unsigned int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
+#endif // CONFIG_INTERINTRA
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
int motion_mode_cost[BLOCK_SIZES][MOTION_MODES];
@@ -625,24 +600,18 @@ typedef struct AV1_COMP {
TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
- int resize_pending;
int resize_state;
int resize_scale_num;
int resize_scale_den;
+ int resize_next_scale_num;
+ int resize_next_scale_den;
int resize_avg_qp;
int resize_buffer_underflow;
int resize_count;
- // VAR_BASED_PARTITION thresholds
- // 0 - threshold_128x128;
- // 1 - threshold_64x64;
- // 2 - threshold_32x32;
- // 3 - threshold_16x16;
- // 4 - threshold_8x8;
- int64_t vbp_thresholds[5];
- int64_t vbp_threshold_minmax;
- int64_t vbp_threshold_sad;
- BLOCK_SIZE vbp_bsize_min;
+#if CONFIG_FRAME_SUPERRES
+ int superres_pending;
+#endif // CONFIG_FRAME_SUPERRES
// VARIANCE_AQ segment map refresh
int vaq_refresh;
@@ -652,12 +621,6 @@ typedef struct AV1_COMP {
AVxWorker *workers;
struct EncWorkerData *tile_thr_data;
AV1LfSync lf_row_sync;
-#if CONFIG_SUBFRAME_PROB_UPDATE
- SUBFRAME_STATS subframe_stats;
- // TODO(yaowu): minimize the size of count buffers
- SUBFRAME_STATS wholeframe_stats;
- av1_coeff_stats branch_ct_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
#if CONFIG_ANS
struct BufAnsCoder buf_ans;
#endif
@@ -720,8 +683,8 @@ int av1_get_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
AOM_SCALING vert_mode);
-int av1_set_size_literal(AV1_COMP *cpi, unsigned int width,
- unsigned int height);
+// Returns 1 if the assigned width or height was <= 0.
+int av1_set_size_literal(AV1_COMP *cpi, int width, int height);
int av1_get_quantizer(struct AV1_COMP *cpi);
@@ -774,7 +737,7 @@ static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(
return &cpi->upsampled_ref_bufs[buf_idx].buf;
}
-#if CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
static INLINE int enc_is_ref_frame_buf(AV1_COMP *cpi, RefCntBuffer *frame_buf) {
MV_REFERENCE_FRAME ref_frame;
AV1_COMMON *const cm = &cpi->common;
@@ -819,14 +782,6 @@ void av1_set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv);
void av1_set_temporal_mv_prediction(AV1_COMP *cpi, int allow_tempmv_prediction);
#endif
-YV12_BUFFER_CONFIG *av1_scale_if_required_fast(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled);
-
-YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
- YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled);
-
void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags);
static INLINE int is_altref_enabled(const AV1_COMP *const cpi) {
@@ -876,6 +831,25 @@ static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
ubufs[new_uidx].ref_count++;
}
+// Returns 1 if a resize is pending and 0 otherwise.
+static INLINE int av1_resize_pending(const struct AV1_COMP *cpi) {
+ return cpi->resize_scale_num != cpi->resize_next_scale_num ||
+ cpi->resize_scale_den != cpi->resize_next_scale_den;
+}
+
+// Returns 1 if a frame is unscaled and 0 otherwise.
+static INLINE int av1_resize_unscaled(const struct AV1_COMP *cpi) {
+ return cpi->resize_scale_num == cpi->resize_scale_den;
+}
+
+// Moves resizing to the next state. This is just setting the numerator and
+// denominator to the next numerator and denominator, causing
+// av1_resize_pending to subsequently return false.
+static INLINE void av1_resize_step(struct AV1_COMP *cpi) {
+ cpi->resize_scale_num = cpi->resize_next_scale_num;
+ cpi->resize_scale_den = cpi->resize_next_scale_den;
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/encoder/encodetxb.c b/third_party/aom/av1/encoder/encodetxb.c
index 3f71a4472..731642064 100644
--- a/third_party/aom/av1/encoder/encodetxb.c
+++ b/third_party/aom/av1/encoder/encodetxb.c
@@ -21,6 +21,8 @@
#include "av1/encoder/subexp.h"
#include "av1/encoder/tokenize.h"
+#define TEST_OPTIMIZE_TXB 0
+
void av1_alloc_txb_buf(AV1_COMP *cpi) {
#if 0
AV1_COMMON *cm = &cpi->common;
@@ -159,7 +161,7 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
}
// level is above 1.
- ctx = get_level_ctx(tcoeff, scan[c], bwl);
+ ctx = get_br_ctx(tcoeff, scan[c], bwl);
for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
if (level == (idx + 1 + NUM_BASE_LEVELS)) {
aom_write(w, 1, cm->fc->coeff_lps[tx_size][plane_type][ctx]);
@@ -251,6 +253,32 @@ static INLINE void get_base_ctx_set(const tran_low_t *tcoeffs,
return;
}
+static INLINE int get_br_cost(tran_low_t abs_qc, int ctx,
+ const aom_prob *coeff_lps) {
+ const tran_low_t min_level = 1 + NUM_BASE_LEVELS;
+ const tran_low_t max_level = 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE;
+ if (abs_qc >= min_level) {
+ const int cost0 = av1_cost_bit(coeff_lps[ctx], 0);
+ const int cost1 = av1_cost_bit(coeff_lps[ctx], 1);
+ if (abs_qc >= max_level)
+ return COEFF_BASE_RANGE * cost0;
+ else
+ return (abs_qc - min_level) * cost0 + cost1;
+ } else {
+ return 0;
+ }
+}
+
+static INLINE int get_base_cost(tran_low_t abs_qc, int ctx,
+ aom_prob (*coeff_base)[COEFF_BASE_CONTEXTS],
+ int base_idx) {
+ const int level = base_idx + 1;
+ if (abs_qc < level)
+ return 0;
+ else
+ return av1_cost_bit(coeff_base[base_idx][ctx], abs_qc == level);
+}
+
int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
int block, TXB_CTX *txb_ctx) {
const AV1_COMMON *const cm = &cpi->common;
@@ -331,7 +359,7 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
int idx;
int ctx;
- ctx = get_level_ctx(qcoeff, scan[c], bwl);
+ ctx = get_br_ctx(qcoeff, scan[c], bwl);
for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
if (level == (idx + 1 + NUM_BASE_LEVELS)) {
@@ -373,12 +401,1085 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
return cost;
}
-typedef struct TxbParams {
- const AV1_COMP *cpi;
- ThreadData *td;
- int rate;
-} TxbParams;
+static INLINE int has_base(tran_low_t qc, int base_idx) {
+ const int level = base_idx + 1;
+ return abs(qc) >= level;
+}
+
+static void gen_base_count_mag_arr(int (*base_count_arr)[MAX_TX_SQUARE],
+ int (*base_mag_arr)[2],
+ const tran_low_t *qcoeff, int stride,
+ int eob, const int16_t *scan) {
+ for (int c = 0; c < eob; ++c) {
+ const int coeff_idx = scan[c]; // raster order
+ if (!has_base(qcoeff[coeff_idx], 0)) continue;
+ const int row = coeff_idx / stride;
+ const int col = coeff_idx % stride;
+ int *mag = base_mag_arr[coeff_idx];
+ get_mag(mag, qcoeff, stride, row, col, base_ref_offset,
+ BASE_CONTEXT_POSITION_NUM);
+ for (int i = 0; i < NUM_BASE_LEVELS; ++i) {
+ if (!has_base(qcoeff[coeff_idx], i)) continue;
+ int *count = base_count_arr[i] + coeff_idx;
+ *count = get_level_count(qcoeff, stride, row, col, i, base_ref_offset,
+ BASE_CONTEXT_POSITION_NUM);
+ }
+ }
+}
+
+static void gen_nz_count_arr(int(*nz_count_arr), const tran_low_t *qcoeff,
+ int stride, int eob,
+ const SCAN_ORDER *scan_order) {
+ const int16_t *scan = scan_order->scan;
+ const int16_t *iscan = scan_order->iscan;
+ for (int c = 0; c < eob; ++c) {
+ const int coeff_idx = scan[c]; // raster order
+ const int row = coeff_idx / stride;
+ const int col = coeff_idx % stride;
+ nz_count_arr[coeff_idx] = get_nz_count(qcoeff, stride, row, col, iscan);
+ }
+}
+
+static void gen_nz_ctx_arr(int (*nz_ctx_arr)[2], int(*nz_count_arr),
+ const tran_low_t *qcoeff, int bwl, int eob,
+ const SCAN_ORDER *scan_order) {
+ const int16_t *scan = scan_order->scan;
+ const int16_t *iscan = scan_order->iscan;
+ for (int c = 0; c < eob; ++c) {
+ const int coeff_idx = scan[c]; // raster order
+ const int count = nz_count_arr[coeff_idx];
+ nz_ctx_arr[coeff_idx][0] =
+ get_nz_map_ctx_from_count(count, qcoeff, coeff_idx, bwl, iscan);
+ }
+}
+
+static void gen_base_ctx_arr(int (*base_ctx_arr)[MAX_TX_SQUARE][2],
+ int (*base_count_arr)[MAX_TX_SQUARE],
+ int (*base_mag_arr)[2], const tran_low_t *qcoeff,
+ int stride, int eob, const int16_t *scan) {
+ (void)qcoeff;
+ for (int i = 0; i < NUM_BASE_LEVELS; ++i) {
+ for (int c = 0; c < eob; ++c) {
+ const int coeff_idx = scan[c]; // raster order
+ if (!has_base(qcoeff[coeff_idx], i)) continue;
+ const int row = coeff_idx / stride;
+ const int col = coeff_idx % stride;
+ const int count = base_count_arr[i][coeff_idx];
+ const int *mag = base_mag_arr[coeff_idx];
+ const int level = i + 1;
+ base_ctx_arr[i][coeff_idx][0] =
+ get_base_ctx_from_count_mag(row, col, count, mag[0], level);
+ }
+ }
+}
+
+static INLINE int has_br(tran_low_t qc) {
+ return abs(qc) >= 1 + NUM_BASE_LEVELS;
+}
+
+static void gen_br_count_mag_arr(int *br_count_arr, int (*br_mag_arr)[2],
+ const tran_low_t *qcoeff, int stride, int eob,
+ const int16_t *scan) {
+ for (int c = 0; c < eob; ++c) {
+ const int coeff_idx = scan[c]; // raster order
+ if (!has_br(qcoeff[coeff_idx])) continue;
+ const int row = coeff_idx / stride;
+ const int col = coeff_idx % stride;
+ int *count = br_count_arr + coeff_idx;
+ int *mag = br_mag_arr[coeff_idx];
+ *count = get_level_count(qcoeff, stride, row, col, NUM_BASE_LEVELS,
+ br_ref_offset, BR_CONTEXT_POSITION_NUM);
+ get_mag(mag, qcoeff, stride, row, col, br_ref_offset,
+ BR_CONTEXT_POSITION_NUM);
+ }
+}
+
+static void gen_br_ctx_arr(int (*br_ctx_arr)[2], const int *br_count_arr,
+ int (*br_mag_arr)[2], const tran_low_t *qcoeff,
+ int stride, int eob, const int16_t *scan) {
+ (void)qcoeff;
+ for (int c = 0; c < eob; ++c) {
+ const int coeff_idx = scan[c]; // raster order
+ if (!has_br(qcoeff[coeff_idx])) continue;
+ const int row = coeff_idx / stride;
+ const int col = coeff_idx % stride;
+ const int count = br_count_arr[coeff_idx];
+ const int *mag = br_mag_arr[coeff_idx];
+ br_ctx_arr[coeff_idx][0] =
+ get_br_ctx_from_count_mag(row, col, count, mag[0]);
+ }
+}
+
+static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx,
+ const aom_prob *dc_sign_prob,
+ int dc_sign_ctx) {
+ const int sign = (qc < 0) ? 1 : 0;
+ // sign bit cost
+ if (coeff_idx == 0) {
+ return av1_cost_bit(dc_sign_prob[dc_sign_ctx], sign);
+ } else {
+ return av1_cost_bit(128, sign);
+ }
+}
+static INLINE int get_golomb_cost(int abs_qc) {
+ if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ // residual cost
+ int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
+ int ri = r;
+ int length = 0;
+
+ while (ri) {
+ ri >>= 1;
+ ++length;
+ }
+
+ return av1_cost_literal(2 * length - 1);
+ } else {
+ return 0;
+ }
+}
+
+// TODO(angiebird): add static once this function is called
+void gen_txb_cache(TxbCache *txb_cache, TxbInfo *txb_info) {
+ const int16_t *scan = txb_info->scan_order->scan;
+ gen_nz_count_arr(txb_cache->nz_count_arr, txb_info->qcoeff, txb_info->stride,
+ txb_info->eob, txb_info->scan_order);
+ gen_nz_ctx_arr(txb_cache->nz_ctx_arr, txb_cache->nz_count_arr,
+ txb_info->qcoeff, txb_info->bwl, txb_info->eob,
+ txb_info->scan_order);
+ gen_base_count_mag_arr(txb_cache->base_count_arr, txb_cache->base_mag_arr,
+ txb_info->qcoeff, txb_info->stride, txb_info->eob,
+ scan);
+ gen_base_ctx_arr(txb_cache->base_ctx_arr, txb_cache->base_count_arr,
+ txb_cache->base_mag_arr, txb_info->qcoeff, txb_info->stride,
+ txb_info->eob, scan);
+ gen_br_count_mag_arr(txb_cache->br_count_arr, txb_cache->br_mag_arr,
+ txb_info->qcoeff, txb_info->stride, txb_info->eob, scan);
+ gen_br_ctx_arr(txb_cache->br_ctx_arr, txb_cache->br_count_arr,
+ txb_cache->br_mag_arr, txb_info->qcoeff, txb_info->stride,
+ txb_info->eob, scan);
+}
+
+static INLINE aom_prob get_level_prob(int level, int coeff_idx,
+ const TxbCache *txb_cache,
+ const TxbProbs *txb_probs) {
+ if (level == 0) {
+ const int ctx = txb_cache->nz_ctx_arr[coeff_idx][0];
+ return txb_probs->nz_map[ctx];
+ } else if (level >= 1 && level < 1 + NUM_BASE_LEVELS) {
+ const int idx = level - 1;
+ const int ctx = txb_cache->base_ctx_arr[idx][coeff_idx][0];
+ return txb_probs->coeff_base[idx][ctx];
+ } else if (level >= 1 + NUM_BASE_LEVELS &&
+ level < 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ const int ctx = txb_cache->br_ctx_arr[coeff_idx][0];
+ return txb_probs->coeff_lps[ctx];
+ } else if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ printf("get_level_prob does not support golomb\n");
+ assert(0);
+ return 0;
+ } else {
+ assert(0);
+ return 0;
+ }
+}
+
+static INLINE tran_low_t get_lower_coeff(tran_low_t qc) {
+ if (qc == 0) {
+ return 0;
+ }
+ return qc > 0 ? qc - 1 : qc + 1;
+}
+
+static INLINE void update_mag_arr(int *mag_arr, int abs_qc) {
+ if (mag_arr[0] == abs_qc) {
+ mag_arr[1] -= 1;
+ assert(mag_arr[1] >= 0);
+ }
+}
+
+static INLINE int get_mag_from_mag_arr(const int *mag_arr) {
+ int mag;
+ if (mag_arr[1] > 0) {
+ mag = mag_arr[0];
+ } else if (mag_arr[0] > 0) {
+ mag = mag_arr[0] - 1;
+ } else {
+ // no neighbor
+ assert(mag_arr[0] == 0 && mag_arr[1] == 0);
+ mag = 0;
+ }
+ return mag;
+}
+
+static int neighbor_level_down_update(int *new_count, int *new_mag, int count,
+ const int *mag, int coeff_idx,
+ tran_low_t abs_nb_coeff, int nb_coeff_idx,
+ int level, const TxbInfo *txb_info) {
+ *new_count = count;
+ *new_mag = get_mag_from_mag_arr(mag);
+
+ int update = 0;
+ // check if br_count changes
+ if (abs_nb_coeff == level) {
+ update = 1;
+ *new_count -= 1;
+ assert(*new_count >= 0);
+ }
+ const int row = coeff_idx >> txb_info->bwl;
+ const int col = coeff_idx - (row << txb_info->bwl);
+ const int nb_row = nb_coeff_idx >> txb_info->bwl;
+ const int nb_col = nb_coeff_idx - (nb_row << txb_info->bwl);
+
+ // check if mag changes
+ if (nb_row >= row && nb_col >= col) {
+ if (abs_nb_coeff == mag[0]) {
+ assert(mag[1] > 0);
+ if (mag[1] == 1) {
+ // the nb is the only qc with max mag
+ *new_mag -= 1;
+ assert(*new_mag >= 0);
+ update = 1;
+ }
+ }
+ }
+ return update;
+}
+
+static int try_neighbor_level_down_br(int coeff_idx, int nb_coeff_idx,
+ const TxbCache *txb_cache,
+ const TxbProbs *txb_probs,
+ const TxbInfo *txb_info) {
+ const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ const tran_low_t abs_qc = abs(qc);
+ const int level = NUM_BASE_LEVELS + 1;
+ if (abs_qc < level) return 0;
+
+ const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
+ const tran_low_t abs_nb_coeff = abs(nb_coeff);
+ const int count = txb_cache->br_count_arr[coeff_idx];
+ const int *mag = txb_cache->br_mag_arr[coeff_idx];
+ int new_count;
+ int new_mag;
+ const int update =
+ neighbor_level_down_update(&new_count, &new_mag, count, mag, coeff_idx,
+ abs_nb_coeff, nb_coeff_idx, level, txb_info);
+ if (update) {
+ const int row = coeff_idx >> txb_info->bwl;
+ const int col = coeff_idx - (row << txb_info->bwl);
+ const int ctx = txb_cache->br_ctx_arr[coeff_idx][0];
+ const int org_cost = get_br_cost(abs_qc, ctx, txb_probs->coeff_lps);
+
+ const int new_ctx = get_br_ctx_from_count_mag(row, col, new_count, new_mag);
+ const int new_cost = get_br_cost(abs_qc, new_ctx, txb_probs->coeff_lps);
+ const int cost_diff = -org_cost + new_cost;
+ return cost_diff;
+ } else {
+ return 0;
+ }
+}
+
+static int try_neighbor_level_down_base(int coeff_idx, int nb_coeff_idx,
+ const TxbCache *txb_cache,
+ const TxbProbs *txb_probs,
+ const TxbInfo *txb_info) {
+ const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ const tran_low_t abs_qc = abs(qc);
+
+ int cost_diff = 0;
+ for (int base_idx = 0; base_idx < NUM_BASE_LEVELS; ++base_idx) {
+ const int level = base_idx + 1;
+ if (abs_qc < level) continue;
+
+ const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
+ const tran_low_t abs_nb_coeff = abs(nb_coeff);
+
+ const int count = txb_cache->base_count_arr[base_idx][coeff_idx];
+ const int *mag = txb_cache->base_mag_arr[coeff_idx];
+ int new_count;
+ int new_mag;
+ const int update =
+ neighbor_level_down_update(&new_count, &new_mag, count, mag, coeff_idx,
+ abs_nb_coeff, nb_coeff_idx, level, txb_info);
+ if (update) {
+ const int row = coeff_idx >> txb_info->bwl;
+ const int col = coeff_idx - (row << txb_info->bwl);
+ const int ctx = txb_cache->base_ctx_arr[base_idx][coeff_idx][0];
+ const int org_cost =
+ get_base_cost(abs_qc, ctx, txb_probs->coeff_base, base_idx);
+
+ const int new_ctx =
+ get_base_ctx_from_count_mag(row, col, new_count, new_mag, level);
+ const int new_cost =
+ get_base_cost(abs_qc, new_ctx, txb_probs->coeff_base, base_idx);
+ cost_diff += -org_cost + new_cost;
+ }
+ }
+ return cost_diff;
+}
+
+static int try_neighbor_level_down_nz(int coeff_idx, int nb_coeff_idx,
+ const TxbCache *txb_cache,
+ const TxbProbs *txb_probs,
+ TxbInfo *txb_info) {
+ // assume eob doesn't change
+ const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ const tran_low_t abs_qc = abs(qc);
+ const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
+ const tran_low_t abs_nb_coeff = abs(nb_coeff);
+ if (abs_nb_coeff != 1) return 0;
+ const int16_t *iscan = txb_info->scan_order->iscan;
+ const int scan_idx = iscan[coeff_idx];
+ if (scan_idx == txb_info->seg_eob) return 0;
+ const int nb_scan_idx = iscan[nb_coeff_idx];
+ if (nb_scan_idx < scan_idx) {
+ const int count = txb_cache->nz_count_arr[coeff_idx];
+ assert(count > 0);
+ txb_info->qcoeff[nb_coeff_idx] = get_lower_coeff(nb_coeff);
+ const int new_ctx = get_nz_map_ctx_from_count(
+ count - 1, txb_info->qcoeff, coeff_idx, txb_info->bwl, iscan);
+ txb_info->qcoeff[nb_coeff_idx] = nb_coeff;
+ const int ctx = txb_cache->nz_ctx_arr[coeff_idx][0];
+ const int is_nz = abs_qc > 0;
+ const int org_cost = av1_cost_bit(txb_probs->nz_map[ctx], is_nz);
+ const int new_cost = av1_cost_bit(txb_probs->nz_map[new_ctx], is_nz);
+ const int cost_diff = new_cost - org_cost;
+ return cost_diff;
+ } else {
+ return 0;
+ }
+}
+
+static int try_self_level_down(tran_low_t *low_coeff, int coeff_idx,
+ const TxbCache *txb_cache,
+ const TxbProbs *txb_probs, TxbInfo *txb_info) {
+ const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ if (qc == 0) {
+ *low_coeff = 0;
+ return 0;
+ }
+ const tran_low_t abs_qc = abs(qc);
+ *low_coeff = get_lower_coeff(qc);
+ int cost_diff;
+ if (*low_coeff == 0) {
+ const int scan_idx = txb_info->scan_order->iscan[coeff_idx];
+ const aom_prob level_prob =
+ get_level_prob(abs_qc, coeff_idx, txb_cache, txb_probs);
+ const aom_prob low_level_prob =
+ get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs);
+ if (scan_idx < txb_info->seg_eob) {
+ // When level-0, we code the binary of abs_qc > level
+ // but when level-k k > 0 we code the binary of abs_qc == level
+ // That's why wee need this special treatment for level-0 map
+ // TODO(angiebird): make leve-0 consistent to other levels
+ cost_diff = -av1_cost_bit(level_prob, 1) +
+ av1_cost_bit(low_level_prob, 0) -
+ av1_cost_bit(low_level_prob, 1);
+ } else {
+ cost_diff = -av1_cost_bit(level_prob, 1);
+ }
+
+ if (scan_idx < txb_info->seg_eob) {
+ const int eob_ctx =
+ get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->bwl);
+ cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx],
+ scan_idx == (txb_info->eob - 1));
+ }
+
+ const int sign_cost = get_sign_bit_cost(
+ qc, coeff_idx, txb_probs->dc_sign_prob, txb_info->txb_ctx->dc_sign_ctx);
+ cost_diff -= sign_cost;
+ } else if (abs_qc < 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ const aom_prob level_prob =
+ get_level_prob(abs_qc, coeff_idx, txb_cache, txb_probs);
+ const aom_prob low_level_prob =
+ get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs);
+ cost_diff = -av1_cost_bit(level_prob, 1) + av1_cost_bit(low_level_prob, 1) -
+ av1_cost_bit(low_level_prob, 0);
+ } else if (abs_qc == 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
+ const aom_prob low_level_prob =
+ get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs);
+ cost_diff = -get_golomb_cost(abs_qc) + av1_cost_bit(low_level_prob, 1) -
+ av1_cost_bit(low_level_prob, 0);
+ } else {
+ assert(abs_qc > 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE);
+ const tran_low_t abs_low_coeff = abs(*low_coeff);
+ cost_diff = -get_golomb_cost(abs_qc) + get_golomb_cost(abs_low_coeff);
+ }
+ return cost_diff;
+}
+
+#define COST_MAP_SIZE 5
+#define COST_MAP_OFFSET 2
+
+static INLINE int check_nz_neighbor(tran_low_t qc) { return abs(qc) == 1; }
+
+static INLINE int check_base_neighbor(tran_low_t qc) {
+ return abs(qc) <= 1 + NUM_BASE_LEVELS;
+}
+
+static INLINE int check_br_neighbor(tran_low_t qc) {
+ return abs(qc) > BR_MAG_OFFSET;
+}
+
+// TODO(angiebird): add static to this function once it's called
+int try_level_down(int coeff_idx, const TxbCache *txb_cache,
+ const TxbProbs *txb_probs, TxbInfo *txb_info,
+ int (*cost_map)[COST_MAP_SIZE]) {
+ if (cost_map) {
+ for (int i = 0; i < COST_MAP_SIZE; ++i) av1_zero(cost_map[i]);
+ }
+
+ tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ tran_low_t low_coeff;
+ if (qc == 0) return 0;
+ int accu_cost_diff = 0;
+
+ const int16_t *iscan = txb_info->scan_order->iscan;
+ const int eob = txb_info->eob;
+ const int scan_idx = iscan[coeff_idx];
+ if (scan_idx < eob) {
+ const int cost_diff = try_self_level_down(&low_coeff, coeff_idx, txb_cache,
+ txb_probs, txb_info);
+ if (cost_map)
+ cost_map[0 + COST_MAP_OFFSET][0 + COST_MAP_OFFSET] = cost_diff;
+ accu_cost_diff += cost_diff;
+ }
+
+ const int row = coeff_idx >> txb_info->bwl;
+ const int col = coeff_idx - (row << txb_info->bwl);
+ if (check_nz_neighbor(qc)) {
+ for (int i = 0; i < SIG_REF_OFFSET_NUM; ++i) {
+ const int nb_row = row - sig_ref_offset[i][0];
+ const int nb_col = col - sig_ref_offset[i][1];
+ const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+ const int nb_scan_idx = iscan[nb_coeff_idx];
+ if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
+ nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ const int cost_diff = try_neighbor_level_down_nz(
+ nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
+ if (cost_map)
+ cost_map[nb_row - row + COST_MAP_OFFSET]
+ [nb_col - col + COST_MAP_OFFSET] += cost_diff;
+ accu_cost_diff += cost_diff;
+ }
+ }
+ }
+
+ if (check_base_neighbor(qc)) {
+ for (int i = 0; i < BASE_CONTEXT_POSITION_NUM; ++i) {
+ const int nb_row = row - base_ref_offset[i][0];
+ const int nb_col = col - base_ref_offset[i][1];
+ const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+ const int nb_scan_idx = iscan[nb_coeff_idx];
+ if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
+ nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ const int cost_diff = try_neighbor_level_down_base(
+ nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
+ if (cost_map)
+ cost_map[nb_row - row + COST_MAP_OFFSET]
+ [nb_col - col + COST_MAP_OFFSET] += cost_diff;
+ accu_cost_diff += cost_diff;
+ }
+ }
+ }
+
+ if (check_br_neighbor(qc)) {
+ for (int i = 0; i < BR_CONTEXT_POSITION_NUM; ++i) {
+ const int nb_row = row - br_ref_offset[i][0];
+ const int nb_col = col - br_ref_offset[i][1];
+ const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+ const int nb_scan_idx = iscan[nb_coeff_idx];
+ if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
+ nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ const int cost_diff = try_neighbor_level_down_br(
+ nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
+ if (cost_map)
+ cost_map[nb_row - row + COST_MAP_OFFSET]
+ [nb_col - col + COST_MAP_OFFSET] += cost_diff;
+ accu_cost_diff += cost_diff;
+ }
+ }
+ }
+
+ return accu_cost_diff;
+}
+
+static int get_low_coeff_cost(int coeff_idx, const TxbCache *txb_cache,
+ const TxbProbs *txb_probs,
+ const TxbInfo *txb_info) {
+ const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ const int abs_qc = abs(qc);
+ assert(abs_qc <= 1);
+ int cost = 0;
+ const int scan_idx = txb_info->scan_order->iscan[coeff_idx];
+ if (scan_idx < txb_info->seg_eob) {
+ const aom_prob level_prob =
+ get_level_prob(0, coeff_idx, txb_cache, txb_probs);
+ cost += av1_cost_bit(level_prob, qc != 0);
+ }
+
+ if (qc != 0) {
+ const int base_idx = 0;
+ const int ctx = txb_cache->base_ctx_arr[base_idx][coeff_idx][0];
+ cost += get_base_cost(abs_qc, ctx, txb_probs->coeff_base, base_idx);
+ if (scan_idx < txb_info->seg_eob) {
+ const int eob_ctx =
+ get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->bwl);
+ cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx],
+ scan_idx == (txb_info->eob - 1));
+ }
+ cost += get_sign_bit_cost(qc, coeff_idx, txb_probs->dc_sign_prob,
+ txb_info->txb_ctx->dc_sign_ctx);
+ }
+ return cost;
+}
+
+static INLINE void set_eob(TxbInfo *txb_info, int eob) {
+ txb_info->eob = eob;
+ txb_info->seg_eob = AOMMIN(eob, tx_size_2d[txb_info->tx_size] - 1);
+}
+
+// TODO(angiebird): add static to this function once it's called
+int try_change_eob(int *new_eob, int coeff_idx, const TxbCache *txb_cache,
+ const TxbProbs *txb_probs, TxbInfo *txb_info) {
+ assert(txb_info->eob > 0);
+ const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ const int abs_qc = abs(qc);
+ if (abs_qc != 1) {
+ *new_eob = -1;
+ return 0;
+ }
+ const int16_t *iscan = txb_info->scan_order->iscan;
+ const int16_t *scan = txb_info->scan_order->scan;
+ const int scan_idx = iscan[coeff_idx];
+ *new_eob = 0;
+ int cost_diff = 0;
+ cost_diff -= get_low_coeff_cost(coeff_idx, txb_cache, txb_probs, txb_info);
+ // int coeff_cost =
+ // get_coeff_cost(qc, scan_idx, txb_info, txb_probs);
+ // if (-cost_diff != coeff_cost) {
+ // printf("-cost_diff %d coeff_cost %d\n", -cost_diff, coeff_cost);
+ // get_low_coeff_cost(coeff_idx, txb_cache, txb_probs, txb_info);
+ // get_coeff_cost(qc, scan_idx, txb_info, txb_probs);
+ // }
+ for (int si = scan_idx - 1; si >= 0; --si) {
+ const int ci = scan[si];
+ if (txb_info->qcoeff[ci] != 0) {
+ *new_eob = si + 1;
+ break;
+ } else {
+ cost_diff -= get_low_coeff_cost(ci, txb_cache, txb_probs, txb_info);
+ }
+ }
+
+ const int org_eob = txb_info->eob;
+ set_eob(txb_info, *new_eob);
+ cost_diff += try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, NULL);
+ set_eob(txb_info, org_eob);
+
+ if (*new_eob > 0) {
+ // Note that get_eob_ctx does NOT actually account for qcoeff, so we don't
+ // need to lower down the qcoeff here
+ const int eob_ctx =
+ get_eob_ctx(txb_info->qcoeff, scan[*new_eob - 1], txb_info->bwl);
+ cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx], 0);
+ cost_diff += av1_cost_bit(txb_probs->eob_flag[eob_ctx], 1);
+ } else {
+ const int txb_skip_ctx = txb_info->txb_ctx->txb_skip_ctx;
+ cost_diff -= av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 0);
+ cost_diff += av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 1);
+ }
+ return cost_diff;
+}
+
+static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int dqv, int shift) {
+ int sgn = qc < 0 ? -1 : 1;
+ return sgn * ((abs(qc) * dqv) >> shift);
+}
+
+// TODO(angiebird): add static to this function it's called
+void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) {
+ const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ const int abs_qc = abs(qc);
+ if (qc == 0) return;
+ const tran_low_t low_coeff = get_lower_coeff(qc);
+ txb_info->qcoeff[coeff_idx] = low_coeff;
+ const int dqv = txb_info->dequant[coeff_idx != 0];
+ txb_info->dqcoeff[coeff_idx] =
+ qcoeff_to_dqcoeff(low_coeff, dqv, txb_info->shift);
+
+ const int row = coeff_idx >> txb_info->bwl;
+ const int col = coeff_idx - (row << txb_info->bwl);
+ const int eob = txb_info->eob;
+ const int16_t *iscan = txb_info->scan_order->iscan;
+ for (int i = 0; i < SIG_REF_OFFSET_NUM; ++i) {
+ const int nb_row = row - sig_ref_offset[i][0];
+ const int nb_col = col - sig_ref_offset[i][1];
+ const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+ const int nb_scan_idx = iscan[nb_coeff_idx];
+ if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
+ nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ const int scan_idx = iscan[coeff_idx];
+ if (scan_idx < nb_scan_idx) {
+ const int level = 1;
+ if (abs_qc == level) {
+ txb_cache->nz_count_arr[nb_coeff_idx] -= 1;
+ assert(txb_cache->nz_count_arr[nb_coeff_idx] >= 0);
+ }
+ const int count = txb_cache->nz_count_arr[nb_coeff_idx];
+ txb_cache->nz_ctx_arr[nb_coeff_idx][0] = get_nz_map_ctx_from_count(
+ count, txb_info->qcoeff, nb_coeff_idx, txb_info->bwl, iscan);
+ // int ref_ctx = get_nz_map_ctx2(txb_info->qcoeff, nb_coeff_idx,
+ // txb_info->bwl, iscan);
+ // if (ref_ctx != txb_cache->nz_ctx_arr[nb_coeff_idx][0])
+ // printf("nz ctx %d ref_ctx %d\n",
+ // txb_cache->nz_ctx_arr[nb_coeff_idx][0], ref_ctx);
+ }
+ }
+ }
+
+ for (int i = 0; i < BASE_CONTEXT_POSITION_NUM; ++i) {
+ const int nb_row = row - base_ref_offset[i][0];
+ const int nb_col = col - base_ref_offset[i][1];
+ const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+ const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
+ if (!has_base(nb_coeff, 0)) continue;
+ const int nb_scan_idx = iscan[nb_coeff_idx];
+ if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
+ nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ if (row >= nb_row && col >= nb_col)
+ update_mag_arr(txb_cache->base_mag_arr[nb_coeff_idx], abs_qc);
+ const int mag =
+ get_mag_from_mag_arr(txb_cache->base_mag_arr[nb_coeff_idx]);
+ for (int base_idx = 0; base_idx < NUM_BASE_LEVELS; ++base_idx) {
+ if (!has_base(nb_coeff, base_idx)) continue;
+ const int level = base_idx + 1;
+ if (abs_qc == level) {
+ txb_cache->base_count_arr[base_idx][nb_coeff_idx] -= 1;
+ assert(txb_cache->base_count_arr[base_idx][nb_coeff_idx] >= 0);
+ }
+ const int count = txb_cache->base_count_arr[base_idx][nb_coeff_idx];
+ txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0] =
+ get_base_ctx_from_count_mag(nb_row, nb_col, count, mag, level);
+ // int ref_ctx = get_base_ctx(txb_info->qcoeff, nb_coeff_idx,
+ // txb_info->bwl, level);
+ // if (ref_ctx != txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0]) {
+ // printf("base ctx %d ref_ctx %d\n",
+ // txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0], ref_ctx);
+ // }
+ }
+ }
+ }
+
+ for (int i = 0; i < BR_CONTEXT_POSITION_NUM; ++i) {
+ const int nb_row = row - br_ref_offset[i][0];
+ const int nb_col = col - br_ref_offset[i][1];
+ const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+ const int nb_scan_idx = iscan[nb_coeff_idx];
+ const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
+ if (!has_br(nb_coeff)) continue;
+ if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 &&
+ nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ const int level = 1 + NUM_BASE_LEVELS;
+ if (abs_qc == level) {
+ txb_cache->br_count_arr[nb_coeff_idx] -= 1;
+ assert(txb_cache->br_count_arr[nb_coeff_idx] >= 0);
+ }
+ if (row >= nb_row && col >= nb_col)
+ update_mag_arr(txb_cache->br_mag_arr[nb_coeff_idx], abs_qc);
+ const int count = txb_cache->br_count_arr[nb_coeff_idx];
+ const int mag = get_mag_from_mag_arr(txb_cache->br_mag_arr[nb_coeff_idx]);
+ txb_cache->br_ctx_arr[nb_coeff_idx][0] =
+ get_br_ctx_from_count_mag(nb_row, nb_col, count, mag);
+ // int ref_ctx = get_level_ctx(txb_info->qcoeff, nb_coeff_idx,
+ // txb_info->bwl);
+ // if (ref_ctx != txb_cache->br_ctx_arr[nb_coeff_idx][0]) {
+ // printf("base ctx %d ref_ctx %d\n",
+ // txb_cache->br_ctx_arr[nb_coeff_idx][0], ref_ctx);
+ // }
+ }
+ }
+}
+
+static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info,
+ const TxbProbs *txb_probs) {
+ const TXB_CTX *txb_ctx = txb_info->txb_ctx;
+ const int is_nz = (qc != 0);
+ const tran_low_t abs_qc = abs(qc);
+ int cost = 0;
+ const int16_t *scan = txb_info->scan_order->scan;
+ const int16_t *iscan = txb_info->scan_order->iscan;
+
+ if (scan_idx < txb_info->seg_eob) {
+ int coeff_ctx =
+ get_nz_map_ctx2(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, iscan);
+ cost += av1_cost_bit(txb_probs->nz_map[coeff_ctx], is_nz);
+ }
+
+ if (is_nz) {
+ cost += get_sign_bit_cost(qc, scan_idx, txb_probs->dc_sign_prob,
+ txb_ctx->dc_sign_ctx);
+
+ int ctx_ls[NUM_BASE_LEVELS] = { 0 };
+ get_base_ctx_set(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, ctx_ls);
+
+ int i;
+ for (i = 0; i < NUM_BASE_LEVELS; ++i) {
+ cost += get_base_cost(abs_qc, ctx_ls[i], txb_probs->coeff_base, i);
+ }
+
+ if (abs_qc > NUM_BASE_LEVELS) {
+ int ctx = get_br_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl);
+ cost += get_br_cost(abs_qc, ctx, txb_probs->coeff_lps);
+ cost += get_golomb_cost(abs_qc);
+ }
+
+ if (scan_idx < txb_info->seg_eob) {
+ int eob_ctx =
+ get_eob_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl);
+ cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx],
+ scan_idx == (txb_info->eob - 1));
+ }
+ }
+ return cost;
+}
+
+#if TEST_OPTIMIZE_TXB
+#define ALL_REF_OFFSET_NUM 17
+static int all_ref_offset[ALL_REF_OFFSET_NUM][2] = {
+ { 0, 0 }, { -2, -1 }, { -2, 0 }, { -2, 1 }, { -1, -2 }, { -1, -1 },
+ { -1, 0 }, { -1, 1 }, { 0, -2 }, { 0, -1 }, { 1, -2 }, { 1, -1 },
+ { 1, 0 }, { 2, 0 }, { 0, 1 }, { 0, 2 }, { 1, 1 },
+};
+
+static int try_level_down_ref(int coeff_idx, const TxbProbs *txb_probs,
+ TxbInfo *txb_info,
+ int (*cost_map)[COST_MAP_SIZE]) {
+ if (cost_map) {
+ for (int i = 0; i < COST_MAP_SIZE; ++i) av1_zero(cost_map[i]);
+ }
+ tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ if (qc == 0) return 0;
+ int row = coeff_idx >> txb_info->bwl;
+ int col = coeff_idx - (row << txb_info->bwl);
+ int org_cost = 0;
+ for (int i = 0; i < ALL_REF_OFFSET_NUM; ++i) {
+ int nb_row = row - all_ref_offset[i][0];
+ int nb_col = col - all_ref_offset[i][1];
+ int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+ int nb_scan_idx = txb_info->scan_order->iscan[nb_coeff_idx];
+ if (nb_scan_idx < txb_info->eob && nb_row >= 0 && nb_col >= 0 &&
+ nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
+ int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_probs);
+ if (cost_map)
+ cost_map[nb_row - row + COST_MAP_OFFSET]
+ [nb_col - col + COST_MAP_OFFSET] -= cost;
+ org_cost += cost;
+ }
+ }
+ txb_info->qcoeff[coeff_idx] = get_lower_coeff(qc);
+ int new_cost = 0;
+ for (int i = 0; i < ALL_REF_OFFSET_NUM; ++i) {
+ int nb_row = row - all_ref_offset[i][0];
+ int nb_col = col - all_ref_offset[i][1];
+ int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
+ int nb_scan_idx = txb_info->scan_order->iscan[nb_coeff_idx];
+ if (nb_scan_idx < txb_info->eob && nb_row >= 0 && nb_col >= 0 &&
+ nb_row < txb_info->stride && nb_col < txb_info->stride) {
+ tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
+ int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_probs);
+ if (cost_map)
+ cost_map[nb_row - row + COST_MAP_OFFSET]
+ [nb_col - col + COST_MAP_OFFSET] += cost;
+ new_cost += cost;
+ }
+ }
+ txb_info->qcoeff[coeff_idx] = qc;
+ return new_cost - org_cost;
+}
+static void test_level_down(int coeff_idx, const TxbCache *txb_cache,
+ const TxbProbs *txb_probs, TxbInfo *txb_info) {
+ int cost_map[COST_MAP_SIZE][COST_MAP_SIZE];
+ int ref_cost_map[COST_MAP_SIZE][COST_MAP_SIZE];
+ const int cost_diff =
+ try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, cost_map);
+ const int cost_diff_ref =
+ try_level_down_ref(coeff_idx, txb_probs, txb_info, ref_cost_map);
+ if (cost_diff != cost_diff_ref) {
+ printf("qc %d cost_diff %d cost_diff_ref %d\n", txb_info->qcoeff[coeff_idx],
+ cost_diff, cost_diff_ref);
+ for (int r = 0; r < COST_MAP_SIZE; ++r) {
+ for (int c = 0; c < COST_MAP_SIZE; ++c) {
+ printf("%d:%d ", cost_map[r][c], ref_cost_map[r][c]);
+ }
+ printf("\n");
+ }
+ }
+}
+#endif
+
+// TODO(angiebird): make this static once it's called
+int get_txb_cost(TxbInfo *txb_info, const TxbProbs *txb_probs) {
+ int cost = 0;
+ int txb_skip_ctx = txb_info->txb_ctx->txb_skip_ctx;
+ const int16_t *scan = txb_info->scan_order->scan;
+ if (txb_info->eob == 0) {
+ cost = av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 1);
+ return cost;
+ }
+ cost = av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 0);
+ for (int c = 0; c < txb_info->eob; ++c) {
+ tran_low_t qc = txb_info->qcoeff[scan[c]];
+ int coeff_cost = get_coeff_cost(qc, c, txb_info, txb_probs);
+ cost += coeff_cost;
+ }
+ return cost;
+}
+
+#if TEST_OPTIMIZE_TXB
+void test_try_change_eob(TxbInfo *txb_info, TxbProbs *txb_probs,
+ TxbCache *txb_cache) {
+ int eob = txb_info->eob;
+ const int16_t *scan = txb_info->scan_order->scan;
+ if (eob > 0) {
+ int last_si = eob - 1;
+ int last_ci = scan[last_si];
+ int last_coeff = txb_info->qcoeff[last_ci];
+ if (abs(last_coeff) == 1) {
+ int new_eob;
+ int cost_diff =
+ try_change_eob(&new_eob, last_ci, txb_cache, txb_probs, txb_info);
+ int org_eob = txb_info->eob;
+ int cost = get_txb_cost(txb_info, txb_probs);
+
+ txb_info->qcoeff[last_ci] = get_lower_coeff(last_coeff);
+ set_eob(txb_info, new_eob);
+ int new_cost = get_txb_cost(txb_info, txb_probs);
+ set_eob(txb_info, org_eob);
+ txb_info->qcoeff[last_ci] = last_coeff;
+
+ int ref_cost_diff = -cost + new_cost;
+ if (cost_diff != ref_cost_diff)
+ printf("org_eob %d new_eob %d cost_diff %d ref_cost_diff %d\n", org_eob,
+ new_eob, cost_diff, ref_cost_diff);
+ }
+ }
+}
+#endif
+
+static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
+ int shift) {
+ const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
+ const int64_t error = diff * diff;
+ return error;
+}
+
+typedef struct LevelDownStats {
+ int update;
+ tran_low_t low_qc;
+ tran_low_t low_dqc;
+ int64_t rd_diff;
+ int cost_diff;
+ int64_t dist_diff;
+ int new_eob;
+} LevelDownStats;
+
+void try_level_down_facade(LevelDownStats *stats, int scan_idx,
+ const TxbCache *txb_cache, const TxbProbs *txb_probs,
+ TxbInfo *txb_info) {
+ const int16_t *scan = txb_info->scan_order->scan;
+ const int coeff_idx = scan[scan_idx];
+ const tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ stats->new_eob = -1;
+ stats->update = 0;
+ if (qc == 0) {
+ return;
+ }
+
+ const tran_low_t tqc = txb_info->tcoeff[coeff_idx];
+ const int dqv = txb_info->dequant[coeff_idx != 0];
+
+ const tran_low_t dqc = qcoeff_to_dqcoeff(qc, dqv, txb_info->shift);
+ const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift);
+
+ stats->low_qc = get_lower_coeff(qc);
+ stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc, dqv, txb_info->shift);
+ const int64_t low_dqc_dist =
+ get_coeff_dist(tqc, stats->low_dqc, txb_info->shift);
+
+ stats->dist_diff = -dqc_dist + low_dqc_dist;
+ stats->cost_diff = 0;
+ stats->new_eob = txb_info->eob;
+ if (scan_idx == txb_info->eob - 1 && abs(qc) == 1) {
+ stats->cost_diff = try_change_eob(&stats->new_eob, coeff_idx, txb_cache,
+ txb_probs, txb_info);
+ } else {
+ stats->cost_diff =
+ try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, NULL);
+#if TEST_OPTIMIZE_TXB
+ test_level_down(coeff_idx, txb_cache, txb_probs, txb_info);
+#endif
+ }
+ stats->rd_diff = RDCOST(txb_info->rdmult, txb_info->rddiv, stats->cost_diff,
+ stats->dist_diff);
+ if (stats->rd_diff < 0) stats->update = 1;
+ return;
+}
+
+static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs,
+ TxbCache *txb_cache, int dry_run) {
+ int update = 0;
+ if (txb_info->eob == 0) return update;
+ int cost_diff = 0;
+ int64_t dist_diff = 0;
+ int64_t rd_diff = 0;
+ const int max_eob = tx_size_2d[txb_info->tx_size];
+
+#if TEST_OPTIMIZE_TXB
+ int64_t sse;
+ int64_t org_dist =
+ av1_block_error_c(txb_info->tcoeff, txb_info->dqcoeff, max_eob, &sse) *
+ (1 << (2 * txb_info->shift));
+ int org_cost = get_txb_cost(txb_info, txb_probs);
+#endif
+
+ tran_low_t *org_qcoeff = txb_info->qcoeff;
+ tran_low_t *org_dqcoeff = txb_info->dqcoeff;
+
+ tran_low_t tmp_qcoeff[MAX_TX_SQUARE];
+ tran_low_t tmp_dqcoeff[MAX_TX_SQUARE];
+ const int org_eob = txb_info->eob;
+ if (dry_run) {
+ memcpy(tmp_qcoeff, org_qcoeff, sizeof(org_qcoeff[0]) * max_eob);
+ memcpy(tmp_dqcoeff, org_dqcoeff, sizeof(org_dqcoeff[0]) * max_eob);
+ txb_info->qcoeff = tmp_qcoeff;
+ txb_info->dqcoeff = tmp_dqcoeff;
+ }
+
+ const int16_t *scan = txb_info->scan_order->scan;
+
+ // forward optimize the nz_map
+ const int cur_eob = txb_info->eob;
+ for (int si = 0; si < cur_eob; ++si) {
+ const int coeff_idx = scan[si];
+ tran_low_t qc = txb_info->qcoeff[coeff_idx];
+ if (abs(qc) == 1) {
+ LevelDownStats stats;
+ try_level_down_facade(&stats, si, txb_cache, txb_probs, txb_info);
+ if (stats.update) {
+ update = 1;
+ cost_diff += stats.cost_diff;
+ dist_diff += stats.dist_diff;
+ rd_diff += stats.rd_diff;
+ update_level_down(coeff_idx, txb_cache, txb_info);
+ set_eob(txb_info, stats.new_eob);
+ }
+ }
+ }
+
+ // backward optimize the level-k map
+ for (int si = txb_info->eob - 1; si >= 0; --si) {
+ LevelDownStats stats;
+ try_level_down_facade(&stats, si, txb_cache, txb_probs, txb_info);
+ const int coeff_idx = scan[si];
+ if (stats.update) {
+#if TEST_OPTIMIZE_TXB
+// printf("si %d low_qc %d cost_diff %d dist_diff %ld rd_diff %ld eob %d new_eob
+// %d\n", si, stats.low_qc, stats.cost_diff, stats.dist_diff, stats.rd_diff,
+// txb_info->eob, stats.new_eob);
+#endif
+ update = 1;
+ cost_diff += stats.cost_diff;
+ dist_diff += stats.dist_diff;
+ rd_diff += stats.rd_diff;
+ update_level_down(coeff_idx, txb_cache, txb_info);
+ set_eob(txb_info, stats.new_eob);
+ }
+ if (si > txb_info->eob) si = txb_info->eob;
+ }
+#if TEST_OPTIMIZE_TXB
+ int64_t new_dist =
+ av1_block_error_c(txb_info->tcoeff, txb_info->dqcoeff, max_eob, &sse) *
+ (1 << (2 * txb_info->shift));
+ int new_cost = get_txb_cost(txb_info, txb_probs);
+ int64_t ref_dist_diff = new_dist - org_dist;
+ int ref_cost_diff = new_cost - org_cost;
+ if (cost_diff != ref_cost_diff || dist_diff != ref_dist_diff)
+ printf(
+ "overall rd_diff %ld\ncost_diff %d ref_cost_diff%d\ndist_diff %ld "
+ "ref_dist_diff %ld\neob %d new_eob %d\n\n",
+ rd_diff, cost_diff, ref_cost_diff, dist_diff, ref_dist_diff, org_eob,
+ txb_info->eob);
+#endif
+ if (dry_run) {
+ txb_info->qcoeff = org_qcoeff;
+ txb_info->dqcoeff = org_dqcoeff;
+ set_eob(txb_info, org_eob);
+ }
+ return update;
+}
+
+// These numbers are empirically obtained.
+static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
+#if CONFIG_EC_ADAPT
+ { 17, 13 }, { 16, 10 },
+#else
+ { 20, 12 }, { 16, 12 },
+#endif
+};
+
+int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
+ TX_SIZE tx_size, TXB_CTX *txb_ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const PLANE_TYPE plane_type = get_plane_type(plane);
+ const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const struct macroblock_plane *p = &x->plane[plane];
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ const int eob = p->eobs[block];
+ tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
+ const int16_t *dequant = pd->dequant;
+ const int seg_eob = AOMMIN(eob, tx_size_2d[tx_size] - 1);
+ const aom_prob *nz_map = xd->fc->nz_map[tx_size][plane_type];
+
+ const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+ const int stride = 1 << bwl;
+ aom_prob(*coeff_base)[COEFF_BASE_CONTEXTS] =
+ xd->fc->coeff_base[tx_size][plane_type];
+
+ const aom_prob *coeff_lps = xd->fc->coeff_lps[tx_size][plane_type];
+
+ const int is_inter = is_inter_block(mbmi);
+ const SCAN_ORDER *const scan_order =
+ get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
+
+ const TxbProbs txb_probs = { xd->fc->dc_sign[plane_type],
+ nz_map,
+ coeff_base,
+ coeff_lps,
+ xd->fc->eob_flag[tx_size][plane_type],
+ xd->fc->txb_skip[tx_size] };
+
+ const int shift = av1_get_tx_scale(tx_size);
+ const int64_t rdmult =
+ (x->rdmult * plane_rd_mult[is_inter][plane_type] + 2) >> 2;
+ const int64_t rddiv = x->rddiv;
+
+ TxbInfo txb_info = { qcoeff, dqcoeff, tcoeff, dequant, shift,
+ tx_size, bwl, stride, eob, seg_eob,
+ scan_order, txb_ctx, rdmult, rddiv };
+ TxbCache txb_cache;
+ gen_txb_cache(&txb_cache, &txb_info);
+
+ const int update = optimize_txb(&txb_info, &txb_probs, &txb_cache, 0);
+ if (update) p->eobs[block] = txb_info.eob;
+ return txb_info.eob;
+}
int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
const SCAN_ORDER *scan_order, int eob) {
const int16_t *scan = scan_order->scan;
@@ -394,10 +1495,10 @@ int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
return cul_level;
}
-static void update_txb_context(int plane, int block, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- void *arg) {
- TxbParams *const args = arg;
+void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ struct tokenize_b_args *const args = arg;
const AV1_COMP *cpi = args->cpi;
const AV1_COMMON *cm = &cpi->common;
ThreadData *const td = args->td;
@@ -418,10 +1519,10 @@ static void update_txb_context(int plane, int block, int blk_row, int blk_col,
av1_set_contexts(xd, pd, plane, tx_size, cul_level, blk_col, blk_row);
}
-static void update_and_record_txb_context(int plane, int block, int blk_row,
- int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg) {
- TxbParams *const args = arg;
+void av1_update_and_record_txb_context(int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg) {
+ struct tokenize_b_args *const args = arg;
const AV1_COMP *cpi = args->cpi;
const AV1_COMMON *cm = &cpi->common;
ThreadData *const td = args->td;
@@ -529,7 +1630,7 @@ static void update_and_record_txb_context(int plane, int block, int blk_row,
}
// level is above 1.
- ctx = get_level_ctx(tcoeff, scan[c], bwl);
+ ctx = get_br_ctx(tcoeff, scan[c], bwl);
for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
if (level == (idx + 1 + NUM_BASE_LEVELS)) {
++td->counts->coeff_lps[tx_size][plane_type][ctx][1];
@@ -568,23 +1669,23 @@ void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td,
const int ctx = av1_get_skip_context(xd);
const int skip_inc =
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
- struct TxbParams arg = { cpi, td, 0 };
+ struct tokenize_b_args arg = { cpi, td, NULL, 0 };
(void)rate;
(void)mi_row;
(void)mi_col;
if (mbmi->skip) {
if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
- reset_skip_context(xd, bsize);
+ av1_reset_skip_context(xd, mi_row, mi_col, bsize);
return;
}
if (!dry_run) {
td->counts->skip[ctx][0] += skip_inc;
av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
- update_and_record_txb_context, &arg);
+ av1_update_and_record_txb_context, &arg);
} else if (dry_run == DRY_RUN_NORMAL) {
- av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, update_txb_context,
- &arg);
+ av1_foreach_transformed_block(xd, bsize, mi_row, mi_col,
+ av1_update_txb_context_b, &arg);
} else {
printf("DRY_RUN_COSTCOEFFS is not supported yet\n");
assert(0);
@@ -749,8 +1850,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
av1_invalid_rd_stats(&this_rd_stats);
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
- if (x->plane[plane].eobs[block] && !xd->lossless[mbmi->segment_id])
- av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx);
+ av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size,
&this_rd_stats.dist, &this_rd_stats.sse,
OUTPUT_HAS_PREDICTED_PIXELS);
@@ -771,8 +1871,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
// copy the best result in the above tx_type search for loop
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
- if (x->plane[plane].eobs[block] && !xd->lossless[mbmi->segment_id])
- av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx);
+ av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
if (!is_inter_block(mbmi)) {
// intra mode needs decoded result such that the next transform block
// can use it for prediction.
diff --git a/third_party/aom/av1/encoder/encodetxb.h b/third_party/aom/av1/encoder/encodetxb.h
index 552d47b54..836033a54 100644
--- a/third_party/aom/av1/encoder/encodetxb.h
+++ b/third_party/aom/av1/encoder/encodetxb.h
@@ -22,6 +22,47 @@
#ifdef __cplusplus
extern "C" {
#endif
+
+typedef struct TxbInfo {
+ tran_low_t *qcoeff;
+ tran_low_t *dqcoeff;
+ const tran_low_t *tcoeff;
+ const int16_t *dequant;
+ int shift;
+ TX_SIZE tx_size;
+ int bwl;
+ int stride;
+ int eob;
+ int seg_eob;
+ const SCAN_ORDER *scan_order;
+ TXB_CTX *txb_ctx;
+ int64_t rdmult;
+ int64_t rddiv;
+} TxbInfo;
+
+typedef struct TxbCache {
+ int nz_count_arr[MAX_TX_SQUARE];
+ int nz_ctx_arr[MAX_TX_SQUARE][2];
+ int base_count_arr[NUM_BASE_LEVELS][MAX_TX_SQUARE];
+ int base_mag_arr[MAX_TX_SQUARE]
+ [2]; // [0]: max magnitude [1]: num of max magnitude
+ int base_ctx_arr[NUM_BASE_LEVELS][MAX_TX_SQUARE][2]; // [1]: not used
+
+ int br_count_arr[MAX_TX_SQUARE];
+ int br_mag_arr[MAX_TX_SQUARE]
+ [2]; // [0]: max magnitude [1]: num of max magnitude
+ int br_ctx_arr[MAX_TX_SQUARE][2]; // [1]: not used
+} TxbCache;
+
+typedef struct TxbProbs {
+ const aom_prob *dc_sign_prob;
+ const aom_prob *nz_map;
+ aom_prob (*coeff_base)[COEFF_BASE_CONTEXTS];
+ const aom_prob *coeff_lps;
+ const aom_prob *eob_flag;
+ const aom_prob *txb_skip;
+} TxbProbs;
+
void av1_alloc_txb_buf(AV1_COMP *cpi);
void av1_free_txb_buf(AV1_COMP *cpi);
int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
@@ -39,6 +80,14 @@ void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td,
const int mi_row, const int mi_col);
void av1_write_txb_probs(AV1_COMP *cpi, aom_writer *w);
+void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg);
+
+void av1_update_and_record_txb_context(int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg);
+
#if CONFIG_TXK_SEL
int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
int block, int blk_row, int blk_col,
@@ -46,6 +95,8 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
int use_fast_coef_costing, RD_STATS *rd_stats);
#endif
+int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
+ TX_SIZE tx_size, TXB_CTX *txb_ctx);
#ifdef __cplusplus
}
#endif
diff --git a/third_party/aom/av1/encoder/ethread.c b/third_party/aom/av1/encoder/ethread.c
index 34f0b9566..7af5f78b6 100644
--- a/third_party/aom/av1/encoder/ethread.c
+++ b/third_party/aom/av1/encoder/ethread.c
@@ -93,14 +93,42 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
thread_data->td->pc_tree = NULL;
av1_setup_pc_tree(cm, thread_data->td);
- // Set up variance tree if needed.
- if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
- av1_setup_var_tree(cm, thread_data->td);
-
+#if CONFIG_MOTION_VAR
+#if CONFIG_HIGHBITDEPTH
+ int buf_scaler = 2;
+#else
+ int buf_scaler = 1;
+#endif
+ CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf,
+ (uint8_t *)aom_memalign(
+ 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
+ sizeof(*thread_data->td->above_pred_buf)));
+ CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf,
+ (uint8_t *)aom_memalign(
+ 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
+ sizeof(*thread_data->td->left_pred_buf)));
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->wsrc_buf,
+ (int32_t *)aom_memalign(
+ 16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->mask_buf,
+ (int32_t *)aom_memalign(
+ 16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf)));
+#endif
// Allocate frame counters in thread data.
CHECK_MEM_ERROR(cm, thread_data->td->counts,
aom_calloc(1, sizeof(*thread_data->td->counts)));
+#if CONFIG_PALETTE
+ // Allocate buffers used by palette coding mode.
+ if (cpi->common.allow_screen_content_tools) {
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->palette_buffer,
+ aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
+ }
+#endif // CONFIG_PALETTE
+
// Create threads
if (!winterface->reset(worker))
aom_internal_error(&cm->error, AOM_CODEC_ERROR,
@@ -127,6 +155,12 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
if (thread_data->td != &cpi->td) {
thread_data->td->mb = cpi->td.mb;
thread_data->td->rd_counts = cpi->td.rd_counts;
+#if CONFIG_MOTION_VAR
+ thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf;
+ thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
+ thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
+ thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
+#endif
}
if (thread_data->td->counts != &cpi->common.counts) {
memcpy(thread_data->td->counts, &cpi->common.counts,
@@ -134,12 +168,8 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
}
#if CONFIG_PALETTE
- // Allocate buffers used by palette coding mode.
- if (cpi->common.allow_screen_content_tools && i < num_workers - 1) {
- MACROBLOCK *x = &thread_data->td->mb;
- CHECK_MEM_ERROR(cm, x->palette_buffer,
- aom_memalign(16, sizeof(*x->palette_buffer)));
- }
+ if (cpi->common.allow_screen_content_tools && i < num_workers - 1)
+ thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer;
#endif // CONFIG_PALETTE
}
@@ -171,6 +201,9 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
if (i < cpi->num_workers - 1) {
av1_accumulate_frame_counts(&cm->counts, thread_data->td->counts);
accumulate_rd_opt(&cpi->td, thread_data->td);
+#if CONFIG_VAR_TX
+ cpi->td.mb.txb_split_count += thread_data->td->mb.txb_split_count;
+#endif
}
}
}
diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c
index e35a54ef2..7a0abba2d 100644
--- a/third_party/aom/av1/encoder/firstpass.c
+++ b/third_party/aom/av1/encoder/firstpass.c
@@ -568,16 +568,11 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
od_init_qm(x->daala_enc.state.qm, x->daala_enc.state.qm_inv,
x->daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT);
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
od_ec_enc_init(&x->daala_enc.w.ec, 65025);
-#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
-#endif
-
-#if CONFIG_DAALA_EC
od_ec_enc_reset(&x->daala_enc.w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
}
#endif
@@ -598,6 +593,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
av1_init_mv_probs(cm);
#if CONFIG_ADAPT_SCAN
av1_init_scan_order(cm);
+ av1_deliver_eob_threshold(cm, xd);
#endif
av1_convolve_init(cm);
#if CONFIG_PVQ
@@ -884,7 +880,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
xd->mi[0]->mbmi.tx_size = TX_4X4;
xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME;
xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
- av1_build_inter_predictors_sby(xd, mb_row * mb_scale,
+ av1_build_inter_predictors_sby(cm, xd, mb_row * mb_scale,
mb_col * mb_scale, NULL, bsize);
av1_encode_sby_pass1(cm, x, bsize);
sum_mvr += mv.row;
@@ -997,10 +993,10 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
}
#if CONFIG_PVQ
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
od_ec_enc_clear(&x->daala_enc.w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
x->pvq_q->last_pos = x->pvq_q->curr_pos;
@@ -1235,28 +1231,26 @@ static void setup_rf_level_maxq(AV1_COMP *cpi) {
}
}
-void av1_init_subsampling(AV1_COMP *cpi) {
- const AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- const int w = cm->width;
- const int h = cm->height;
- int i;
-
- for (i = 0; i < FRAME_SCALE_STEPS; ++i) {
- // Note: Frames with odd-sized dimensions may result from this scaling.
- rc->frame_width[i] = (w * 16) / frame_scale_factor[i];
- rc->frame_height[i] = (h * 16) / frame_scale_factor[i];
- }
-
- setup_rf_level_maxq(cpi);
+void av1_calculate_next_scaled_size(const AV1_COMP *cpi,
+ int *scaled_frame_width,
+ int *scaled_frame_height) {
+ *scaled_frame_width =
+ cpi->oxcf.width * cpi->resize_next_scale_num / cpi->resize_next_scale_den;
+ *scaled_frame_height = cpi->oxcf.height * cpi->resize_next_scale_num /
+ cpi->resize_next_scale_den;
}
-void av1_calculate_coded_size(AV1_COMP *cpi, int *scaled_frame_width,
- int *scaled_frame_height) {
- RATE_CONTROL *const rc = &cpi->rc;
- *scaled_frame_width = rc->frame_width[rc->frame_size_selector];
- *scaled_frame_height = rc->frame_height[rc->frame_size_selector];
+#if CONFIG_FRAME_SUPERRES
+void av1_calculate_superres_size(const AV1_COMP *cpi, int *encoded_width,
+ int *encoded_height) {
+ *encoded_width = cpi->oxcf.scaled_frame_width *
+ cpi->common.superres_scale_numerator /
+ SUPERRES_SCALE_DENOMINATOR;
+ *encoded_height = cpi->oxcf.scaled_frame_height *
+ cpi->common.superres_scale_numerator /
+ SUPERRES_SCALE_DENOMINATOR;
}
+#endif // CONFIG_FRAME_SUPERRES
void av1_init_second_pass(AV1_COMP *cpi) {
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1316,7 +1310,7 @@ void av1_init_second_pass(AV1_COMP *cpi) {
twopass->last_kfgroup_zeromotion_pct = 100;
if (oxcf->resize_mode != RESIZE_NONE) {
- av1_init_subsampling(cpi);
+ setup_rf_level_maxq(cpi);
}
}
@@ -2300,7 +2294,8 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (oxcf->resize_mode == RESIZE_DYNAMIC) {
// Default to starting GF groups at normal frame size.
- cpi->rc.next_frame_size_selector = UNSCALED;
+ // TODO(afergs): Make a function for this
+ cpi->resize_next_scale_num = cpi->resize_next_scale_den;
}
}
@@ -2646,7 +2641,8 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (oxcf->resize_mode == RESIZE_DYNAMIC) {
// Default to normal-sized frame on keyframes.
- cpi->rc.next_frame_size_selector = UNSCALED;
+ // TODO(afergs): Make a function for this
+ cpi->resize_next_scale_num = cpi->resize_next_scale_den;
}
}
diff --git a/third_party/aom/av1/encoder/firstpass.h b/third_party/aom/av1/encoder/firstpass.h
index db459cc22..43104454c 100644
--- a/third_party/aom/av1/encoder/firstpass.h
+++ b/third_party/aom/av1/encoder/firstpass.h
@@ -177,10 +177,17 @@ void av1_twopass_postencode_update(struct AV1_COMP *cpi);
// Post encode update of the rate control parameters for 2-pass
void av1_twopass_postencode_update(struct AV1_COMP *cpi);
-void av1_init_subsampling(struct AV1_COMP *cpi);
-
-void av1_calculate_coded_size(struct AV1_COMP *cpi, int *scaled_frame_width,
- int *scaled_frame_height);
+void av1_calculate_next_scaled_size(const struct AV1_COMP *cpi,
+ int *scaled_frame_width,
+ int *scaled_frame_height);
+
+#if CONFIG_FRAME_SUPERRES
+// This is the size after superress scaling, which could be 1:1.
+// Superres scaling happens after regular downscaling.
+// TODO(afergs): Limit overall reduction to 1/2 of the original size
+void av1_calculate_superres_size(const struct AV1_COMP *cpi, int *encoded_width,
+ int *encoded_height);
+#endif // CONFIG_FRAME_SUPERRES
#if CONFIG_EXT_REFS
static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
diff --git a/third_party/aom/av1/encoder/global_motion.c b/third_party/aom/av1/encoder/global_motion.c
index 2a6204939..74cbc8ae7 100644
--- a/third_party/aom/av1/encoder/global_motion.c
+++ b/third_party/aom/av1/encoder/global_motion.c
@@ -124,14 +124,15 @@ static void force_wmtype(WarpedMotionParams *wm, TransformationType wmtype) {
wm->wmtype = wmtype;
}
-double refine_integerized_param(WarpedMotionParams *wm,
- TransformationType wmtype,
+int64_t refine_integerized_param(WarpedMotionParams *wm,
+ TransformationType wmtype,
#if CONFIG_HIGHBITDEPTH
- int use_hbd, int bd,
+ int use_hbd, int bd,
#endif // CONFIG_HIGHBITDEPTH
- uint8_t *ref, int r_width, int r_height,
- int r_stride, uint8_t *dst, int d_width,
- int d_height, int d_stride, int n_refinements) {
+ uint8_t *ref, int r_width, int r_height,
+ int r_stride, uint8_t *dst, int d_width,
+ int d_height, int d_stride,
+ int n_refinements) {
static const int max_trans_model_params[TRANS_TYPES] = {
0, 2, 4, 6, 8, 8, 8
};
@@ -139,22 +140,21 @@ double refine_integerized_param(WarpedMotionParams *wm,
int i = 0, p;
int n_params = max_trans_model_params[wmtype];
int32_t *param_mat = wm->wmmat;
- double step_error;
+ int64_t step_error, best_error;
int32_t step;
int32_t *param;
int32_t curr_param;
int32_t best_param;
- double best_error;
force_wmtype(wm, wmtype);
- best_error = av1_warp_erroradv(wm,
+ best_error = av1_warp_error(wm,
#if CONFIG_HIGHBITDEPTH
- use_hbd, bd,
+ use_hbd, bd,
#endif // CONFIG_HIGHBITDEPTH
- ref, r_width, r_height, r_stride,
- dst + border * d_stride + border, border,
- border, d_width - 2 * border,
- d_height - 2 * border, d_stride, 0, 0, 16, 16);
+ ref, r_width, r_height, r_stride,
+ dst + border * d_stride + border, border, border,
+ d_width - 2 * border, d_height - 2 * border,
+ d_stride, 0, 0, 16, 16);
step = 1 << (n_refinements + 1);
for (i = 0; i < n_refinements; i++, step >>= 1) {
for (p = 0; p < n_params; ++p) {
@@ -167,7 +167,7 @@ double refine_integerized_param(WarpedMotionParams *wm,
best_param = curr_param;
// look to the left
*param = add_param_offset(p, curr_param, -step);
- step_error = av1_warp_erroradv(
+ step_error = av1_warp_error(
wm,
#if CONFIG_HIGHBITDEPTH
use_hbd, bd,
@@ -183,7 +183,7 @@ double refine_integerized_param(WarpedMotionParams *wm,
// look to the right
*param = add_param_offset(p, curr_param, step);
- step_error = av1_warp_erroradv(
+ step_error = av1_warp_error(
wm,
#if CONFIG_HIGHBITDEPTH
use_hbd, bd,
@@ -202,7 +202,7 @@ double refine_integerized_param(WarpedMotionParams *wm,
// for the biggest step size
while (step_dir) {
*param = add_param_offset(p, best_param, step * step_dir);
- step_error = av1_warp_erroradv(
+ step_error = av1_warp_error(
wm,
#if CONFIG_HIGHBITDEPTH
use_hbd, bd,
diff --git a/third_party/aom/av1/encoder/global_motion.h b/third_party/aom/av1/encoder/global_motion.h
index 8fc757f38..38509df6a 100644
--- a/third_party/aom/av1/encoder/global_motion.h
+++ b/third_party/aom/av1/encoder/global_motion.h
@@ -26,14 +26,17 @@ void convert_model_to_params(const double *params, WarpedMotionParams *model);
int is_enough_erroradvantage(double erroradv, int params_cost);
-double refine_integerized_param(WarpedMotionParams *wm,
- TransformationType wmtype,
+// Returns the av1_warp_error between "dst" and the result of applying the
+// motion params that result from fine-tuning "wm" to "ref". Note that "wm" is
+// modified in place.
+int64_t refine_integerized_param(WarpedMotionParams *wm,
+ TransformationType wmtype,
#if CONFIG_HIGHBITDEPTH
- int use_hbd, int bd,
+ int use_hbd, int bd,
#endif // CONFIG_HIGHBITDEPTH
- uint8_t *ref, int r_width, int r_height,
- int r_stride, uint8_t *dst, int d_width,
- int d_height, int d_stride, int n_refinements);
+ uint8_t *ref, int r_width, int r_height,
+ int r_stride, uint8_t *dst, int d_width,
+ int d_height, int d_stride, int n_refinements);
/*
Computes "num_motions" candidate global motion parameters between two frames.
diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
index 4fd563163..c57deed84 100644
--- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
+++ b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
@@ -16,7 +16,7 @@
#include "av1/common/idct.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
static void fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless) {
tran_high_t a1 = src_diff[0];
@@ -132,8 +132,38 @@ static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
}
#endif // CONFIG_TX64X64
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+static void fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ av1_fht16x4(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ av1_fht4x16(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ av1_fht32x8(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ av1_fht8x32(src_diff, coeff, diff_stride, tx_type);
+}
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+
#if CONFIG_HIGHBITDEPTH
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless,
const int bd) {
@@ -425,11 +455,25 @@ void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
case TX_4X4:
fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
break;
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
case TX_2X2:
fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless);
break;
#endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ case TX_4X16:
+ fwd_txfm_4x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_16X4:
+ fwd_txfm_16x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_8X32:
+ fwd_txfm_8x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_32X8:
+ fwd_txfm_32x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
default: assert(0); break;
}
}
@@ -488,7 +532,7 @@ void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
case TX_4X4:
highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd);
break;
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
case TX_2X2:
highbd_fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless, bd);
break;
diff --git a/third_party/aom/av1/encoder/mathutils.h b/third_party/aom/av1/encoder/mathutils.h
new file mode 100644
index 000000000..23243dd9e
--- /dev/null
+++ b/third_party/aom/av1/encoder/mathutils.h
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <memory.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+static const double TINY_NEAR_ZERO = 1.0E-16;
+
+// Solves Ax = b, where x and b are column vectors of size nx1 and A is nxn
+static INLINE int linsolve(int n, double *A, int stride, double *b, double *x) {
+ int i, j, k;
+ double c;
+ // Forward elimination
+ for (k = 0; k < n - 1; k++) {
+ // Bring the largest magitude to the diagonal position
+ for (i = n - 1; i > k; i--) {
+ if (fabs(A[(i - 1) * stride + k]) < fabs(A[i * stride + k])) {
+ for (j = 0; j < n; j++) {
+ c = A[i * stride + j];
+ A[i * stride + j] = A[(i - 1) * stride + j];
+ A[(i - 1) * stride + j] = c;
+ }
+ c = b[i];
+ b[i] = b[i - 1];
+ b[i - 1] = c;
+ }
+ }
+ for (i = k; i < n - 1; i++) {
+ if (fabs(A[k * stride + k]) < TINY_NEAR_ZERO) return 0;
+ c = A[(i + 1) * stride + k] / A[k * stride + k];
+ for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j];
+ b[i + 1] -= c * b[k];
+ }
+ }
+ // Backward substitution
+ for (i = n - 1; i >= 0; i--) {
+ if (fabs(A[i * stride + i]) < TINY_NEAR_ZERO) return 0;
+ c = 0;
+ for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j];
+ x[i] = (b[i] - c) / A[i * stride + i];
+ }
+
+ return 1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Least-squares
+// Solves for n-dim x in a least squares sense to minimize |Ax - b|^2
+// The solution is simply x = (A'A)^-1 A'b or simply the solution for
+// the system: A'A x = A'b
+static INLINE int least_squares(int n, double *A, int rows, int stride,
+ double *b, double *scratch, double *x) {
+ int i, j, k;
+ double *scratch_ = NULL;
+ double *AtA, *Atb;
+ if (!scratch) {
+ scratch_ = (double *)aom_malloc(sizeof(*scratch) * n * (n + 1));
+ scratch = scratch_;
+ }
+ AtA = scratch;
+ Atb = scratch + n * n;
+
+ for (i = 0; i < n; ++i) {
+ for (j = i; j < n; ++j) {
+ AtA[i * n + j] = 0.0;
+ for (k = 0; k < rows; ++k)
+ AtA[i * n + j] += A[k * stride + i] * A[k * stride + j];
+ AtA[j * n + i] = AtA[i * n + j];
+ }
+ Atb[i] = 0;
+ for (k = 0; k < rows; ++k) Atb[i] += A[k * stride + i] * b[k];
+ }
+ int ret = linsolve(n, AtA, n, Atb, x);
+ if (scratch_) aom_free(scratch_);
+ return ret;
+}
+
+// Matrix multiply
+static INLINE void multiply_mat(const double *m1, const double *m2, double *res,
+ const int m1_rows, const int inner_dim,
+ const int m2_cols) {
+ double sum;
+
+ int row, col, inner;
+ for (row = 0; row < m1_rows; ++row) {
+ for (col = 0; col < m2_cols; ++col) {
+ sum = 0;
+ for (inner = 0; inner < inner_dim; ++inner)
+ sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
+ *(res++) = sum;
+ }
+ }
+}
+
+//
+// The functions below are needed only for homography computation
+// Remove if the homography models are not used.
+//
+///////////////////////////////////////////////////////////////////////////////
+// svdcmp
+// Adopted from Numerical Recipes in C
+
+static INLINE double sign(double a, double b) {
+ return ((b) >= 0 ? fabs(a) : -fabs(a));
+}
+
+static INLINE double pythag(double a, double b) {
+ double ct;
+ const double absa = fabs(a);
+ const double absb = fabs(b);
+
+ if (absa > absb) {
+ ct = absb / absa;
+ return absa * sqrt(1.0 + ct * ct);
+ } else {
+ ct = absa / absb;
+ return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
+ }
+}
+
+static INLINE int svdcmp(double **u, int m, int n, double w[], double **v) {
+ const int max_its = 30;
+ int flag, i, its, j, jj, k, l, nm;
+ double anorm, c, f, g, h, s, scale, x, y, z;
+ double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
+ g = scale = anorm = 0.0;
+ for (i = 0; i < n; i++) {
+ l = i + 1;
+ rv1[i] = scale * g;
+ g = s = scale = 0.0;
+ if (i < m) {
+ for (k = i; k < m; k++) scale += fabs(u[k][i]);
+ if (scale != 0.) {
+ for (k = i; k < m; k++) {
+ u[k][i] /= scale;
+ s += u[k][i] * u[k][i];
+ }
+ f = u[i][i];
+ g = -sign(sqrt(s), f);
+ h = f * g - s;
+ u[i][i] = f - g;
+ for (j = l; j < n; j++) {
+ for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
+ f = s / h;
+ for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+ }
+ for (k = i; k < m; k++) u[k][i] *= scale;
+ }
+ }
+ w[i] = scale * g;
+ g = s = scale = 0.0;
+ if (i < m && i != n - 1) {
+ for (k = l; k < n; k++) scale += fabs(u[i][k]);
+ if (scale != 0.) {
+ for (k = l; k < n; k++) {
+ u[i][k] /= scale;
+ s += u[i][k] * u[i][k];
+ }
+ f = u[i][l];
+ g = -sign(sqrt(s), f);
+ h = f * g - s;
+ u[i][l] = f - g;
+ for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
+ for (j = l; j < m; j++) {
+ for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
+ for (k = l; k < n; k++) u[j][k] += s * rv1[k];
+ }
+ for (k = l; k < n; k++) u[i][k] *= scale;
+ }
+ }
+ anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
+ }
+
+ for (i = n - 1; i >= 0; i--) {
+ if (i < n - 1) {
+ if (g != 0.) {
+ for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
+ for (j = l; j < n; j++) {
+ for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
+ for (k = l; k < n; k++) v[k][j] += s * v[k][i];
+ }
+ }
+ for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
+ }
+ v[i][i] = 1.0;
+ g = rv1[i];
+ l = i;
+ }
+ for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
+ l = i + 1;
+ g = w[i];
+ for (j = l; j < n; j++) u[i][j] = 0.0;
+ if (g != 0.) {
+ g = 1.0 / g;
+ for (j = l; j < n; j++) {
+ for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
+ f = (s / u[i][i]) * g;
+ for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+ }
+ for (j = i; j < m; j++) u[j][i] *= g;
+ } else {
+ for (j = i; j < m; j++) u[j][i] = 0.0;
+ }
+ ++u[i][i];
+ }
+ for (k = n - 1; k >= 0; k--) {
+ for (its = 0; its < max_its; its++) {
+ flag = 1;
+ for (l = k; l >= 0; l--) {
+ nm = l - 1;
+ if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
+ flag = 0;
+ break;
+ }
+ if ((double)(fabs(w[nm]) + anorm) == anorm) break;
+ }
+ if (flag) {
+ c = 0.0;
+ s = 1.0;
+ for (i = l; i <= k; i++) {
+ f = s * rv1[i];
+ rv1[i] = c * rv1[i];
+ if ((double)(fabs(f) + anorm) == anorm) break;
+ g = w[i];
+ h = pythag(f, g);
+ w[i] = h;
+ h = 1.0 / h;
+ c = g * h;
+ s = -f * h;
+ for (j = 0; j < m; j++) {
+ y = u[j][nm];
+ z = u[j][i];
+ u[j][nm] = y * c + z * s;
+ u[j][i] = z * c - y * s;
+ }
+ }
+ }
+ z = w[k];
+ if (l == k) {
+ if (z < 0.0) {
+ w[k] = -z;
+ for (j = 0; j < n; j++) v[j][k] = -v[j][k];
+ }
+ break;
+ }
+ if (its == max_its - 1) {
+ aom_free(rv1);
+ return 1;
+ }
+ assert(k > 0);
+ x = w[l];
+ nm = k - 1;
+ y = w[nm];
+ g = rv1[nm];
+ h = rv1[k];
+ f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
+ g = pythag(f, 1.0);
+ f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
+ c = s = 1.0;
+ for (j = l; j <= nm; j++) {
+ i = j + 1;
+ g = rv1[i];
+ y = w[i];
+ h = s * g;
+ g = c * g;
+ z = pythag(f, h);
+ rv1[j] = z;
+ c = f / z;
+ s = h / z;
+ f = x * c + g * s;
+ g = g * c - x * s;
+ h = y * s;
+ y *= c;
+ for (jj = 0; jj < n; jj++) {
+ x = v[jj][j];
+ z = v[jj][i];
+ v[jj][j] = x * c + z * s;
+ v[jj][i] = z * c - x * s;
+ }
+ z = pythag(f, h);
+ w[j] = z;
+ if (z != 0.) {
+ z = 1.0 / z;
+ c = f * z;
+ s = h * z;
+ }
+ f = c * g + s * y;
+ x = c * y - s * g;
+ for (jj = 0; jj < m; jj++) {
+ y = u[jj][j];
+ z = u[jj][i];
+ u[jj][j] = y * c + z * s;
+ u[jj][i] = z * c - y * s;
+ }
+ }
+ rv1[l] = 0.0;
+ rv1[k] = f;
+ w[k] = x;
+ }
+ }
+ aom_free(rv1);
+ return 0;
+}
+
+static INLINE int SVD(double *U, double *W, double *V, double *matx, int M,
+ int N) {
+ // Assumes allocation for U is MxN
+ double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
+ double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
+ int problem, i;
+
+ problem = !(nrU && nrV);
+ if (!problem) {
+ for (i = 0; i < M; i++) {
+ nrU[i] = &U[i * N];
+ }
+ for (i = 0; i < N; i++) {
+ nrV[i] = &V[i * N];
+ }
+ } else {
+ if (nrU) aom_free(nrU);
+ if (nrV) aom_free(nrV);
+ return 1;
+ }
+
+ /* copy from given matx into nrU */
+ for (i = 0; i < M; i++) {
+ memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
+ }
+
+ /* HERE IT IS: do SVD */
+ if (svdcmp(nrU, M, N, W, nrV)) {
+ aom_free(nrU);
+ aom_free(nrV);
+ return 1;
+ }
+
+ /* aom_free Numerical Recipes arrays */
+ aom_free(nrU);
+ aom_free(nrV);
+
+ return 0;
+}
diff --git a/third_party/aom/av1/encoder/mbgraph.c b/third_party/aom/av1/encoder/mbgraph.c
index 1296027dc..3f5daebcc 100644
--- a/third_party/aom/av1/encoder/mbgraph.c
+++ b/third_party/aom/av1/encoder/mbgraph.c
@@ -52,11 +52,14 @@ static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv,
{
int distortion;
unsigned int sse;
- cpi->find_fractional_mv_step(x, ref_mv, cpi->common.allow_high_precision_mv,
- x->errorperbit, &v_fn_ptr, 0,
- mv_sf->subpel_iters_per_step,
- cond_cost_list(cpi, cost_list), NULL, NULL,
- &distortion, &sse, NULL, 0, 0, 0);
+ cpi->find_fractional_mv_step(
+ x, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
+#if CONFIG_EXT_INTER
+ NULL, 0, 0,
+#endif
+ 0, 0, 0);
}
#if CONFIG_EXT_INTER
@@ -71,7 +74,8 @@ static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv,
xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
#endif // CONFIG_EXT_INTER
- av1_build_inter_predictors_sby(xd, mb_row, mb_col, NULL, BLOCK_16X16);
+ av1_build_inter_predictors_sby(&cpi->common, xd, mb_row, mb_col, NULL,
+ BLOCK_16X16);
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
diff --git a/third_party/aom/av1/encoder/mcomp.c b/third_party/aom/av1/encoder/mcomp.c
index d069eefb0..52080ca0d 100644
--- a/third_party/aom/av1/encoder/mcomp.c
+++ b/third_party/aom/av1/encoder/mcomp.c
@@ -110,7 +110,7 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
int sad_per_bit) {
const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
return ROUND_POWER_OF_TWO(
- (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->mvsadcost) * sad_per_bit,
+ (unsigned)mv_cost(&diff, x->nmvjointcost, x->mvcost) * sad_per_bit,
AV1_PROB_COST_SHIFT);
}
@@ -176,6 +176,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
}
/* checks if (r, c) has better score than previous best */
+#if CONFIG_EXT_INTER
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
MV this_mv = { r, c }; \
@@ -183,6 +184,10 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
if (second_pred == NULL) \
thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
src_address, src_stride, &sse); \
+ else if (mask) \
+ thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, second_pred, mask, \
+ mask_stride, invert_mask, &sse); \
else \
thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
src_address, src_stride, &sse, second_pred); \
@@ -197,6 +202,29 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
} else { \
v = INT_MAX; \
}
+#else
+#define CHECK_BETTER(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = { r, c }; \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ if (second_pred == NULL) \
+ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, &sse); \
+ else \
+ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, &sse, second_pred); \
+ v += thismse; \
+ if (v < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+#endif // CONFIG_EXT_INTER
#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
@@ -206,6 +234,26 @@ static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r,
}
/* checks if (r, c) has better score than previous best */
+#if CONFIG_EXT_INTER
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = { r, c }; \
+ thismse = upsampled_pref_error( \
+ xd, vfp, src_address, src_stride, upre(y, y_stride, r, c), y_stride, \
+ second_pred, mask, mask_stride, invert_mask, w, h, &sse); \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ v += thismse; \
+ if (v < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+#else
#define CHECK_BETTER1(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
MV this_mv = { r, c }; \
@@ -224,6 +272,7 @@ static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r,
} else { \
v = INT_MAX; \
}
+#endif // CONFIG_EXT_INTER
#define FIRST_LEVEL_CHECKS \
{ \
@@ -327,20 +376,36 @@ static unsigned int setup_center_error(
const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
int error_per_bit, const aom_variance_fn_ptr_t *vfp,
const uint8_t *const src, const int src_stride, const uint8_t *const y,
- int y_stride, const uint8_t *second_pred, int w, int h, int offset,
- int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) {
+ int y_stride, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
+ int *distortion) {
unsigned int besterr;
#if CONFIG_HIGHBITDEPTH
if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
- aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
- y_stride);
+#if CONFIG_EXT_INTER
+ if (mask)
+ aom_highbd_comp_mask_pred(comp_pred16, second_pred, w, h, y + offset,
+ y_stride, mask, mask_stride, invert_mask);
+ else
+#endif
+ aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
+ y_stride);
besterr =
vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
} else {
DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
- aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+#if CONFIG_EXT_INTER
+ if (mask)
+ aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
+ mask, mask_stride, invert_mask);
+ else
+#endif
+ aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
}
} else {
@@ -352,7 +417,13 @@ static unsigned int setup_center_error(
(void)xd;
if (second_pred != NULL) {
DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
- aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+#if CONFIG_EXT_INTER
+ if (mask)
+ aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
+ mask, mask_stride, invert_mask);
+ else
+#endif
+ aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
} else {
besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
@@ -391,12 +462,19 @@ int av1_find_best_sub_pixel_tree_pruned_evenmore(
MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, const uint8_t *second_pred, int w, int h,
- int use_upsampled_ref) {
+ unsigned int *sse1, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int use_upsampled_ref) {
SETUP_SUBPEL_SEARCH;
- besterr = setup_center_error(
- xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
- y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
+ besterr =
+ setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
+ src_stride, y, y_stride, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, invert_mask,
+#endif
+ w, h, offset, mvjcost, mvcost, sse1, distortion);
(void)halfiters;
(void)quarteriters;
(void)eighthiters;
@@ -457,14 +535,21 @@ int av1_find_best_sub_pixel_tree_pruned_more(
MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, const uint8_t *second_pred, int w, int h,
- int use_upsampled_ref) {
+ unsigned int *sse1, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int use_upsampled_ref) {
SETUP_SUBPEL_SEARCH;
(void)use_upsampled_ref;
- besterr = setup_center_error(
- xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
- y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
+ besterr =
+ setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
+ src_stride, y, y_stride, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, invert_mask,
+#endif
+ w, h, offset, mvjcost, mvcost, sse1, distortion);
if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
@@ -519,14 +604,21 @@ int av1_find_best_sub_pixel_tree_pruned(
MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, const uint8_t *second_pred, int w, int h,
- int use_upsampled_ref) {
+ unsigned int *sse1, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int use_upsampled_ref) {
SETUP_SUBPEL_SEARCH;
(void)use_upsampled_ref;
- besterr = setup_center_error(
- xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
- y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
+ besterr =
+ setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
+ src_stride, y, y_stride, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, invert_mask,
+#endif
+ w, h, offset, mvjcost, mvcost, sse1, distortion);
if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
cost_list[4] != INT_MAX) {
@@ -612,17 +704,29 @@ static int upsampled_pref_error(const MACROBLOCKD *xd,
const aom_variance_fn_ptr_t *vfp,
const uint8_t *const src, const int src_stride,
const uint8_t *const y, int y_stride,
- const uint8_t *second_pred, int w, int h,
- unsigned int *sse) {
+ const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride,
+ int invert_mask,
+#endif
+ int w, int h, unsigned int *sse) {
unsigned int besterr;
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
- if (second_pred != NULL)
- aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
- y_stride);
- else
+ if (second_pred != NULL) {
+#if CONFIG_EXT_INTER
+ if (mask)
+ aom_highbd_comp_mask_upsampled_pred(pred16, second_pred, w, h, y,
+ y_stride, mask, mask_stride,
+ invert_mask);
+ else
+#endif
+ aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
+ y_stride);
+ } else {
aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
+ }
besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse);
} else {
@@ -631,10 +735,17 @@ static int upsampled_pref_error(const MACROBLOCKD *xd,
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
(void)xd;
#endif // CONFIG_HIGHBITDEPTH
- if (second_pred != NULL)
- aom_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride);
- else
+ if (second_pred != NULL) {
+#if CONFIG_EXT_INTER
+ if (mask)
+ aom_comp_mask_upsampled_pred(pred, second_pred, w, h, y, y_stride, mask,
+ mask_stride, invert_mask);
+ else
+#endif
+ aom_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride);
+ } else {
aom_upsampled_pred(pred, w, h, y, y_stride);
+ }
besterr = vfp->vf(pred, w, src, src_stride, sse);
#if CONFIG_HIGHBITDEPTH
@@ -647,23 +758,32 @@ static unsigned int upsampled_setup_center_error(
const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
int error_per_bit, const aom_variance_fn_ptr_t *vfp,
const uint8_t *const src, const int src_stride, const uint8_t *const y,
- int y_stride, const uint8_t *second_pred, int w, int h, int offset,
- int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) {
- unsigned int besterr = upsampled_pref_error(
- xd, vfp, src, src_stride, y + offset, y_stride, second_pred, w, h, sse1);
+ int y_stride, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
+ int *distortion) {
+ unsigned int besterr = upsampled_pref_error(xd, vfp, src, src_stride,
+ y + offset, y_stride, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, invert_mask,
+#endif
+ w, h, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
}
-int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
- int error_per_bit,
- const aom_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred, int w, int h,
- int use_upsampled_ref) {
+int av1_find_best_sub_pixel_tree(
+ MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
+ const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int use_upsampled_ref) {
const uint8_t *const src_address = x->plane[0].src.buf;
const int src_stride = x->plane[0].src.stride;
const MACROBLOCKD *xd = &x->e_mbd;
@@ -700,12 +820,19 @@ int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
if (use_upsampled_ref)
besterr = upsampled_setup_center_error(
xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
- y_stride, second_pred, w, h, (offset * 8), mvjcost, mvcost, sse1,
- distortion);
+ y_stride, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, invert_mask,
+#endif
+ w, h, (offset * 8), mvjcost, mvcost, sse1, distortion);
else
- besterr = setup_center_error(
- xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
- y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
+ besterr =
+ setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
+ src_stride, y, y_stride, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, invert_mask,
+#endif
+ w, h, offset, mvjcost, mvcost, sse1, distortion);
(void)cost_list; // to silence compiler warning
@@ -721,14 +848,23 @@ int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
const uint8_t *const pre_address = y + tr * y_stride + tc;
thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
- pre_address, y_stride, second_pred, w,
- h, &sse);
+ pre_address, y_stride, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, invert_mask,
+#endif
+ w, h, &sse);
} else {
const uint8_t *const pre_address =
y + (tr >> 3) * y_stride + (tc >> 3);
if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
+#if CONFIG_EXT_INTER
+ else if (mask)
+ thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, second_pred, mask,
+ mask_stride, invert_mask, &sse);
+#endif
else
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
@@ -760,15 +896,24 @@ int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
if (use_upsampled_ref) {
const uint8_t *const pre_address = y + tr * y_stride + tc;
- thismse =
- upsampled_pref_error(xd, vfp, src_address, src_stride, pre_address,
- y_stride, second_pred, w, h, &sse);
+ thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
+ pre_address, y_stride, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, invert_mask,
+#endif
+ w, h, &sse);
} else {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
if (second_pred == NULL)
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
src_stride, &sse);
+#if CONFIG_EXT_INTER
+ else if (mask)
+ thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, second_pred, mask,
+ mask_stride, invert_mask, &sse);
+#endif
else
thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
@@ -822,6 +967,102 @@ int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
#undef PRE
#undef CHECK_BETTER
+#if CONFIG_WARPED_MOTION
+unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ const MV *this_mv) {
+ const AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mi->mbmi;
+ const uint8_t *const src = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ uint8_t *const dst = xd->plane[0].dst.buf;
+ const int dst_stride = xd->plane[0].dst.stride;
+ const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize];
+ const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
+ unsigned int mse;
+ unsigned int sse;
+
+ av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
+ mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
+ mse +=
+ mv_err_cost(this_mv, &ref_mv, x->nmvjointcost, x->mvcost, x->errorperbit);
+ return mse;
+}
+
+// Refine MV in a small range
+unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int *pts, int *pts_inref) {
+ const AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mi->mbmi;
+ const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
+ { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } };
+ const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
+ int16_t br = mbmi->mv[0].as_mv.row;
+ int16_t bc = mbmi->mv[0].as_mv.col;
+ int16_t *tr = &mbmi->mv[0].as_mv.row;
+ int16_t *tc = &mbmi->mv[0].as_mv.col;
+ WarpedMotionParams best_wm_params = mbmi->wm_params[0];
+ unsigned int bestmse;
+ int minc, maxc, minr, maxr;
+ const int start = cm->allow_high_precision_mv ? 0 : 4;
+ int ite;
+
+ av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
+ &ref_mv);
+
+ // Calculate the center position's error
+ assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr);
+ bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col,
+ &mbmi->mv[0].as_mv);
+
+ // MV search
+ for (ite = 0; ite < 2; ++ite) {
+ int best_idx = -1;
+ int idx;
+
+ for (idx = start; idx < start + 4; ++idx) {
+ unsigned int thismse;
+
+ *tr = br + neighbors[idx].row;
+ *tc = bc + neighbors[idx].col;
+
+ if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
+ MV this_mv = { *tr, *tc };
+ if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr,
+ *tc, &mbmi->wm_params[0], mi_row, mi_col)) {
+ thismse =
+ av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv);
+
+ if (thismse < bestmse) {
+ best_idx = idx;
+ best_wm_params = mbmi->wm_params[0];
+ bestmse = thismse;
+ }
+ }
+ }
+ }
+
+ if (best_idx == -1) break;
+
+ if (best_idx >= 0) {
+ br += neighbors[best_idx].row;
+ bc += neighbors[best_idx].col;
+ }
+ }
+
+ *tr = br;
+ *tc = bc;
+ mbmi->wm_params[0] = best_wm_params;
+
+ return bestmse;
+}
+#endif // CONFIG_WARPED_MOTION
+
static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
int range) {
return ((row - range) >= mv_limits->row_min) &
@@ -1232,6 +1473,27 @@ int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
: 0);
}
+#if CONFIG_EXT_INTER
+int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
+ const MV *center_mv, const uint8_t *second_pred,
+ const uint8_t *mask, int mask_stride,
+ int invert_mask, const aom_variance_fn_ptr_t *vfp,
+ int use_mvcost) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const MV mv = { best_mv->row * 8, best_mv->col * 8 };
+ unsigned int unused;
+
+ return vfp->msvf(what->buf, what->stride, 0, 0,
+ get_buf_from_mv(in_what, best_mv), in_what->stride,
+ second_pred, mask, mask_stride, invert_mask, &unused) +
+ (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+ x->errorperbit)
+ : 0);
+}
+#endif
+
int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
int sad_per_bit, int do_init_search, int *cost_list,
const aom_variance_fn_ptr_t *vfp, int use_mvcost,
@@ -1685,17 +1947,12 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]);
int idx;
- const int bw = 4 << b_width_log2_lookup[bsize];
- const int bh = 4 << b_height_log2_lookup[bsize];
- const int search_width = bw << 1;
- const int search_height = bh << 1;
const int src_stride = x->plane[0].src.stride;
const int ref_stride = xd->plane[0].pre[0].stride;
uint8_t const *ref_buf, *src_buf;
MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
unsigned int best_sad, tmp_sad, sad_arr[4];
MV this_mv;
- const int norm_factor = 3 + (bw >> 5);
const YV12_BUFFER_CONFIG *scaled_ref_frame =
av1_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
@@ -1724,6 +1981,12 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
}
#endif
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int search_width = bw << 1;
+ const int search_height = bh << 1;
+ const int norm_factor = 3 + (bw >> 5);
+
// Set up prediction 1-D reference set
ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
for (idx = 0; idx < search_width; idx += 16) {
@@ -2195,9 +2458,13 @@ int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
}
// This function is called when we do joint motion search in comp_inter_inter
-// mode.
+// mode, or when searching for one component of an ext-inter compound mode.
int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
const aom_variance_fn_ptr_t *fn_ptr,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride,
+ int invert_mask,
+#endif
const MV *center_mv, const uint8_t *second_pred) {
const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
{ -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
@@ -2211,10 +2478,18 @@ int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
- best_sad =
- fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
- in_what->stride, second_pred) +
- mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+#if CONFIG_EXT_INTER
+ if (mask)
+ best_sad = fn_ptr->msdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, best_mv), in_what->stride,
+ second_pred, mask, mask_stride, invert_mask) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+ else
+#endif
+ best_sad =
+ fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
+ in_what->stride, second_pred) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
for (i = 0; i < search_range; ++i) {
int best_site = -1;
@@ -2224,9 +2499,17 @@ int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
best_mv->col + neighbors[j].col };
if (is_mv_in(&x->mv_limits, &mv)) {
- unsigned int sad =
- fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
- in_what->stride, second_pred);
+ unsigned int sad;
+#if CONFIG_EXT_INTER
+ if (mask)
+ sad = fn_ptr->msdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride,
+ second_pred, mask, mask_stride, invert_mask);
+ else
+#endif
+ sad = fn_ptr->sdaf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride,
+ second_pred);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
@@ -2337,612 +2620,20 @@ int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
return var;
}
-#if CONFIG_EXT_INTER
-/* returns subpixel variance error function */
-#define DIST(r, c) \
- vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, src_stride, \
- mask, mask_stride, &sse)
-
-/* checks if (r, c) has better score than previous best */
-
-#define MVC(r, c) \
- (mvcost \
- ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \
- mvcost[1][((c)-rc)]) * \
- error_per_bit + \
- 4096) >> \
- 13 \
- : 0)
-
-#define CHECK_BETTER(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- thismse = (DIST(r, c)); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
- }
-
-#undef CHECK_BETTER0
-#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
-
-#undef CHECK_BETTER1
-#define CHECK_BETTER1(v, r, c) \
- if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
- thismse = upsampled_masked_pref_error(xd, mask, mask_stride, vfp, z, \
- src_stride, upre(y, y_stride, r, c), \
- y_stride, w, h, &sse); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
- besterr = v; \
- br = r; \
- bc = c; \
- *distortion = thismse; \
- *sse1 = sse; \
- } \
- } else { \
- v = INT_MAX; \
- }
-
-int av1_find_best_masked_sub_pixel_tree(
- const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv,
- const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
- int is_second) {
- const uint8_t *const z = x->plane[0].src.buf;
- const int src_stride = x->plane[0].src.stride;
- const MACROBLOCKD *xd = &x->e_mbd;
- unsigned int besterr = INT_MAX;
- unsigned int sse;
- int thismse;
- unsigned int whichdir;
- unsigned int halfiters = iters_per_step;
- unsigned int quarteriters = iters_per_step;
- unsigned int eighthiters = iters_per_step;
-
- const int y_stride = xd->plane[0].pre[is_second].stride;
- const int offset = bestmv->row * y_stride + bestmv->col;
- const uint8_t *const y = xd->plane[0].pre[is_second].buf;
-
- int rr = ref_mv->row;
- int rc = ref_mv->col;
- int br = bestmv->row * 8;
- int bc = bestmv->col * 8;
- int hstep = 4;
- int tr = br;
- int tc = bc;
- int minc, maxc, minr, maxr;
-
- av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
- ref_mv);
-
- // central mv
- bestmv->row *= 8;
- bestmv->col *= 8;
-
- // calculate central point error
- besterr =
- vfp->mvf(y + offset, y_stride, z, src_stride, mask, mask_stride, sse1);
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-
- // 1/2 pel
- FIRST_LEVEL_CHECKS;
- if (halfiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- tr = br;
- tc = bc;
-
- // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
- if (forced_stop != 2) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (quarteriters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- tr = br;
- tc = bc;
- }
-
- if (allow_hp && forced_stop == 0) {
- hstep >>= 1;
- FIRST_LEVEL_CHECKS;
- if (eighthiters > 1) {
- SECOND_LEVEL_CHECKS;
- }
- tr = br;
- tc = bc;
- }
- // These lines insure static analysis doesn't warn that
- // tr and tc aren't used after the above point.
- (void)tr;
- (void)tc;
-
- bestmv->row = br;
- bestmv->col = bc;
-
- return besterr;
-}
-
-static unsigned int setup_masked_center_error(
- const uint8_t *mask, int mask_stride, const MV *bestmv, const MV *ref_mv,
- int error_per_bit, const aom_variance_fn_ptr_t *vfp,
- const uint8_t *const src, const int src_stride, const uint8_t *const y,
- int y_stride, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
- int *distortion) {
- unsigned int besterr;
- besterr =
- vfp->mvf(y + offset, y_stride, src, src_stride, mask, mask_stride, sse1);
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
- return besterr;
-}
-
-static int upsampled_masked_pref_error(const MACROBLOCKD *xd,
- const uint8_t *mask, int mask_stride,
- const aom_variance_fn_ptr_t *vfp,
- const uint8_t *const src,
- const int src_stride,
- const uint8_t *const y, int y_stride,
- int w, int h, unsigned int *sse) {
- unsigned int besterr;
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
- aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
-
- besterr = vfp->mvf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, mask,
- mask_stride, sse);
- } else {
- DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
-#else
- DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
- (void)xd;
-#endif // CONFIG_HIGHBITDEPTH
- aom_upsampled_pred(pred, w, h, y, y_stride);
-
- besterr = vfp->mvf(pred, w, src, src_stride, mask, mask_stride, sse);
-#if CONFIG_HIGHBITDEPTH
- }
-#endif
- return besterr;
-}
-
-static unsigned int upsampled_setup_masked_center_error(
- const MACROBLOCKD *xd, const uint8_t *mask, int mask_stride,
- const MV *bestmv, const MV *ref_mv, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, const uint8_t *const src,
- const int src_stride, const uint8_t *const y, int y_stride, int w, int h,
- int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
- int *distortion) {
- unsigned int besterr =
- upsampled_masked_pref_error(xd, mask, mask_stride, vfp, src, src_stride,
- y + offset, y_stride, w, h, sse1);
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
- return besterr;
-}
-
-int av1_find_best_masked_sub_pixel_tree_up(
- const AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
- int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
- int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
- int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
- unsigned int *sse1, int is_second, int use_upsampled_ref) {
- const uint8_t *const z = x->plane[0].src.buf;
- const uint8_t *const src_address = z;
- const int src_stride = x->plane[0].src.stride;
- MACROBLOCKD *xd = &x->e_mbd;
- struct macroblockd_plane *const pd = &xd->plane[0];
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- unsigned int besterr = INT_MAX;
- unsigned int sse;
- unsigned int thismse;
-
- int rr = ref_mv->row;
- int rc = ref_mv->col;
- int br = bestmv->row * 8;
- int bc = bestmv->col * 8;
- int hstep = 4;
- int iter;
- int round = 3 - forced_stop;
- int tr = br;
- int tc = bc;
- const MV *search_step = search_step_table;
- int idx, best_idx = -1;
- unsigned int cost_array[5];
- int kr, kc;
- const int w = block_size_wide[mbmi->sb_type];
- const int h = block_size_high[mbmi->sb_type];
- int offset;
- int y_stride;
- const uint8_t *y;
-
- const struct buf_2d backup_pred = pd->pre[is_second];
- int minc, maxc, minr, maxr;
-
- av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
- ref_mv);
-
- if (use_upsampled_ref) {
- int ref = xd->mi[0]->mbmi.ref_frame[is_second];
- const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
- setup_pred_plane(&pd->pre[is_second], mbmi->sb_type,
- upsampled_ref->y_buffer, upsampled_ref->y_crop_width,
- upsampled_ref->y_crop_height, upsampled_ref->y_stride,
- (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
- pd->subsampling_y);
- }
- y = pd->pre[is_second].buf;
- y_stride = pd->pre[is_second].stride;
- offset = bestmv->row * y_stride + bestmv->col;
-
- if (!allow_hp)
- if (round == 3) round = 2;
-
- bestmv->row *= 8;
- bestmv->col *= 8;
-
- // use_upsampled_ref can be 0 or 1
- if (use_upsampled_ref)
- besterr = upsampled_setup_masked_center_error(
- xd, mask, mask_stride, bestmv, ref_mv, error_per_bit, vfp, z,
- src_stride, y, y_stride, w, h, (offset * 8), mvjcost, mvcost, sse1,
- distortion);
- else
- besterr = setup_masked_center_error(
- mask, mask_stride, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y,
- y_stride, offset, mvjcost, mvcost, sse1, distortion);
-
- for (iter = 0; iter < round; ++iter) {
- // Check vertical and horizontal sub-pixel positions.
- for (idx = 0; idx < 4; ++idx) {
- tr = br + search_step[idx].row;
- tc = bc + search_step[idx].col;
- if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- MV this_mv = { tr, tc };
-
- if (use_upsampled_ref) {
- const uint8_t *const pre_address = y + tr * y_stride + tc;
-
- thismse = upsampled_masked_pref_error(
- xd, mask, mask_stride, vfp, src_address, src_stride, pre_address,
- y_stride, w, h, &sse);
- } else {
- const uint8_t *const pre_address =
- y + (tr >> 3) * y_stride + (tc >> 3);
- thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
- src_address, src_stride, mask, mask_stride, &sse);
- }
-
- cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
- mvcost, error_per_bit);
-
- if (cost_array[idx] < besterr) {
- best_idx = idx;
- besterr = cost_array[idx];
- *distortion = thismse;
- *sse1 = sse;
- }
- } else {
- cost_array[idx] = INT_MAX;
- }
- }
-
- // Check diagonal sub-pixel position
- kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
- kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
-
- tc = bc + kc;
- tr = br + kr;
- if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- MV this_mv = { tr, tc };
-
- if (use_upsampled_ref) {
- const uint8_t *const pre_address = y + tr * y_stride + tc;
-
- thismse = upsampled_masked_pref_error(
- xd, mask, mask_stride, vfp, src_address, src_stride, pre_address,
- y_stride, w, h, &sse);
- } else {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-
- thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, mask, mask_stride, &sse);
- }
-
- cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
- error_per_bit);
-
- if (cost_array[4] < besterr) {
- best_idx = 4;
- besterr = cost_array[4];
- *distortion = thismse;
- *sse1 = sse;
- }
- } else {
- cost_array[idx] = INT_MAX;
- }
-
- if (best_idx < 4 && best_idx >= 0) {
- br += search_step[best_idx].row;
- bc += search_step[best_idx].col;
- } else if (best_idx == 4) {
- br = tr;
- bc = tc;
- }
-
- if (iters_per_step > 1 && best_idx != -1) {
- if (use_upsampled_ref) {
- SECOND_LEVEL_CHECKS_BEST(1);
- } else {
- SECOND_LEVEL_CHECKS_BEST(0);
- }
- }
-
- tr = br;
- tc = bc;
-
- search_step += 4;
- hstep >>= 1;
- best_idx = -1;
- }
-
- // These lines insure static analysis doesn't warn that
- // tr and tc aren't used after the above point.
- (void)tr;
- (void)tc;
-
- bestmv->row = br;
- bestmv->col = bc;
-
- if (use_upsampled_ref) {
- pd->pre[is_second] = backup_pred;
- }
-
- return besterr;
-}
-
-#undef DIST
-#undef MVC
-#undef CHECK_BETTER
-
-static int get_masked_mvpred_var(const MACROBLOCK *x, const uint8_t *mask,
- int mask_stride, const MV *best_mv,
- const MV *center_mv,
- const aom_variance_fn_ptr_t *vfp,
- int use_mvcost, int is_second) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
- const MV mv = { best_mv->row * 8, best_mv->col * 8 };
- unsigned int unused;
-
- return vfp->mvf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
- in_what->stride, mask, mask_stride, &unused) +
- (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit)
- : 0);
-}
-
-int masked_refining_search_sad(const MACROBLOCK *x, const uint8_t *mask,
- int mask_stride, MV *ref_mv, int error_per_bit,
- int search_range,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv, int is_second) {
- const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
- const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
- unsigned int best_sad =
- fn_ptr->msdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
- in_what->stride, mask, mask_stride) +
- mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
- int i, j;
-
- for (i = 0; i < search_range; i++) {
- int best_site = -1;
-
- for (j = 0; j < 4; j++) {
- const MV mv = { ref_mv->row + neighbors[j].row,
- ref_mv->col + neighbors[j].col };
- if (is_mv_in(&x->mv_limits, &mv)) {
- unsigned int sad =
- fn_ptr->msdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
- in_what->stride, mask, mask_stride);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- best_site = j;
- }
- }
- }
- }
-
- if (best_site == -1) {
- break;
- } else {
- ref_mv->row += neighbors[best_site].row;
- ref_mv->col += neighbors[best_site].col;
- }
- }
- return best_sad;
-}
-
-int masked_diamond_search_sad(const MACROBLOCK *x,
- const search_site_config *cfg,
- const uint8_t *mask, int mask_stride, MV *ref_mv,
- MV *best_mv, int search_param, int sad_per_bit,
- int *num00, const aom_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv, int is_second) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const struct buf_2d *const what = &x->plane[0].src;
- const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
- // search_param determines the length of the initial step and hence the number
- // of iterations
- // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
- // (MAX_FIRST_STEP/4) pel... etc.
- const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
- const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
- const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
- const uint8_t *best_address, *in_what_ref;
- int best_sad = INT_MAX;
- int best_site = 0;
- int last_site = 0;
- int i, j, step;
-
- clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
- x->mv_limits.row_min, x->mv_limits.row_max);
- in_what_ref = get_buf_from_mv(in_what, ref_mv);
- best_address = in_what_ref;
- *num00 = 0;
- *best_mv = *ref_mv;
-
- // Check the starting position
- best_sad = fn_ptr->msdf(what->buf, what->stride, best_address,
- in_what->stride, mask, mask_stride) +
- mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
-
- i = 1;
-
- for (step = 0; step < tot_steps; step++) {
- for (j = 0; j < cfg->searches_per_step; j++) {
- const MV mv = { best_mv->row + ss[i].mv.row,
- best_mv->col + ss[i].mv.col };
- if (is_mv_in(&x->mv_limits, &mv)) {
- int sad =
- fn_ptr->msdf(what->buf, what->stride, best_address + ss[i].offset,
- in_what->stride, mask, mask_stride);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- best_site = i;
- }
- }
- }
-
- i++;
- }
-
- if (best_site != last_site) {
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
- last_site = best_site;
-#if defined(NEW_DIAMOND_SEARCH)
- while (1) {
- const MV this_mv = { best_mv->row + ss[best_site].mv.row,
- best_mv->col + ss[best_site].mv.col };
- if (is_mv_in(&x->mv_limits, &this_mv)) {
- int sad = fn_ptr->msdf(what->buf, what->stride,
- best_address + ss[best_site].offset,
- in_what->stride, mask, mask_stride);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
- continue;
- }
- }
- }
- break;
- }
-#endif
- } else if (best_address == in_what_ref) {
- (*num00)++;
- }
- }
- return best_sad;
-}
-
-int av1_masked_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
- const uint8_t *mask, int mask_stride,
- MV *mvp_full, int step_param, int sadpb,
- int further_steps, int do_refine,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv, int is_second) {
- MV temp_mv;
- int thissme, n, num00 = 0;
- int bestsme = masked_diamond_search_sad(x, &cpi->ss_cfg, mask, mask_stride,
- mvp_full, &temp_mv, step_param, sadpb,
- &n, fn_ptr, ref_mv, is_second);
- if (bestsme < INT_MAX)
- bestsme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv,
- fn_ptr, 1, is_second);
- *dst_mv = temp_mv;
-
- // If there won't be more n-step search, check to see if refining search is
- // needed.
- if (n > further_steps) do_refine = 0;
-
- while (n < further_steps) {
- ++n;
-
- if (num00) {
- num00--;
- } else {
- thissme = masked_diamond_search_sad(
- x, &cpi->ss_cfg, mask, mask_stride, mvp_full, &temp_mv,
- step_param + n, sadpb, &num00, fn_ptr, ref_mv, is_second);
- if (thissme < INT_MAX)
- thissme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv,
- fn_ptr, 1, is_second);
-
- // check to see if refining search is needed.
- if (num00 > further_steps - n) do_refine = 0;
-
- if (thissme < bestsme) {
- bestsme = thissme;
- *dst_mv = temp_mv;
- }
- }
- }
-
- // final 1-away diamond refining search
- if (do_refine) {
- const int search_range = 8;
- MV best_mv = *dst_mv;
- thissme =
- masked_refining_search_sad(x, mask, mask_stride, &best_mv, sadpb,
- search_range, fn_ptr, ref_mv, is_second);
- if (thissme < INT_MAX)
- thissme = get_masked_mvpred_var(x, mask, mask_stride, &best_mv, ref_mv,
- fn_ptr, 1, is_second);
- if (thissme < bestsme) {
- bestsme = thissme;
- *dst_mv = best_mv;
- }
- }
- return bestsme;
-}
-#endif // CONFIG_EXT_INTER
-
#if CONFIG_MOTION_VAR
/* returns subpixel variance error function */
#define DIST(r, c) \
vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
/* checks if (r, c) has better score than previous best */
-#define MVC(r, c) \
- (mvcost \
- ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \
- mvcost[1][((c)-rc)]) * \
- error_per_bit + \
- 4096) >> \
- 13 \
- : 0)
+#define MVC(r, c) \
+ (unsigned int)(mvcost \
+ ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
+ mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
+ error_per_bit + \
+ 4096) >> \
+ 13 \
+ : 0)
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
@@ -3452,15 +3143,21 @@ int av1_obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
(void)thismse; \
(void)cost_list;
// Return the maximum MV.
-int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
- int error_per_bit,
- const aom_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred, int w, int h,
- int use_upsampled_ref) {
+int av1_return_max_sub_pixel_mv(
+ MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
+ const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int use_upsampled_ref) {
COMMON_MV_TEST;
+#if CONFIG_EXT_INTER
+ (void)mask;
+ (void)mask_stride;
+ (void)invert_mask;
+#endif
(void)minr;
(void)minc;
bestmv->row = maxr;
@@ -3472,17 +3169,23 @@ int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
return besterr;
}
// Return the minimum MV.
-int av1_return_min_sub_pixel_mv(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
- int error_per_bit,
- const aom_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred, int w, int h,
- int use_upsampled_ref) {
+int av1_return_min_sub_pixel_mv(
+ MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
+ const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int use_upsampled_ref) {
COMMON_MV_TEST;
(void)maxr;
(void)maxc;
+#if CONFIG_EXT_INTER
+ (void)mask;
+ (void)mask_stride;
+ (void)invert_mask;
+#endif
bestmv->row = minr;
bestmv->col = minc;
besterr = 0;
diff --git a/third_party/aom/av1/encoder/mcomp.h b/third_party/aom/av1/encoder/mcomp.h
index 8465860ad..7e8b4b29d 100644
--- a/third_party/aom/av1/encoder/mcomp.h
+++ b/third_party/aom/av1/encoder/mcomp.h
@@ -58,6 +58,13 @@ int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
const MV *center_mv, const uint8_t *second_pred,
const aom_variance_fn_ptr_t *vfp, int use_mvcost);
+#if CONFIG_EXT_INTER
+int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
+ const MV *center_mv, const uint8_t *second_pred,
+ const uint8_t *mask, int mask_stride,
+ int invert_mask, const aom_variance_fn_ptr_t *vfp,
+ int use_mvcost);
+#endif
struct AV1_COMP;
struct SPEED_FEATURES;
@@ -91,8 +98,11 @@ typedef int(fractional_mv_step_fp)(
const aom_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w,
- int h, int use_upsampled_ref);
+ int *distortion, unsigned int *sse1, const uint8_t *second_pred,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride, int invert_mask,
+#endif
+ int w, int h, int use_upsampled_ref);
extern fractional_mv_step_fp av1_find_best_sub_pixel_tree;
extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned;
@@ -113,6 +123,10 @@ typedef int (*av1_diamond_search_fn_t)(
int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
const aom_variance_fn_ptr_t *fn_ptr,
+#if CONFIG_EXT_INTER
+ const uint8_t *mask, int mask_stride,
+ int invert_mask,
+#endif
const MV *center_mv, const uint8_t *second_pred);
struct AV1_COMP;
@@ -122,27 +136,6 @@ int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
int error_per_bit, int *cost_list, const MV *ref_mv,
int var_max, int rd);
-#if CONFIG_EXT_INTER
-int av1_find_best_masked_sub_pixel_tree(
- const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv,
- const MV *ref_mv, int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
- int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
- int is_second);
-int av1_find_best_masked_sub_pixel_tree_up(
- const struct AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask,
- int mask_stride, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv,
- int allow_hp, int error_per_bit, const aom_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1, int is_second, int use_upsampled_ref);
-int av1_masked_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
- const uint8_t *mask, int mask_stride,
- MV *mvp_full, int step_param, int sadpb,
- int further_steps, int do_refine,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv, int is_second);
-#endif // CONFIG_EXT_INTER
-
#if CONFIG_MOTION_VAR
int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
MV *mvp_full, int step_param, int sadpb,
@@ -160,4 +153,14 @@ int av1_find_best_obmc_sub_pixel_tree_up(
} // extern "C"
#endif
+#if CONFIG_WARPED_MOTION
+unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi,
+ MACROBLOCK *const x, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, const MV *this_mv);
+unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
+ MACROBLOCK *const x, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, int *pts,
+ int *pts_inref);
+#endif // CONFIG_WARPED_MOTION
+
#endif // AV1_ENCODER_MCOMP_H_
diff --git a/third_party/aom/av1/encoder/palette.c b/third_party/aom/av1/encoder/palette.c
index 355141de5..235964dde 100644
--- a/third_party/aom/av1/encoder/palette.c
+++ b/third_party/aom/av1/encoder/palette.c
@@ -167,31 +167,58 @@ int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) {
}
#if CONFIG_PALETTE_DELTA_ENCODING
-int av1_get_palette_delta_bits_y(const PALETTE_MODE_INFO *const pmi,
- int bit_depth, int *min_bits) {
- const int n = pmi->palette_size[0];
- int max_d = 0, i;
- *min_bits = bit_depth - 3;
- for (i = 1; i < n; ++i) {
- const int delta = pmi->palette_colors[i] - pmi->palette_colors[i - 1];
- assert(delta > 0);
- if (delta > max_d) max_d = delta;
+static int delta_encode_cost(const int *colors, int num, int bit_depth,
+ int min_val) {
+ if (num <= 0) return 0;
+ int bits_cost = bit_depth;
+ if (num == 1) return bits_cost;
+ bits_cost += 2;
+ int max_delta = 0;
+ int deltas[PALETTE_MAX_SIZE];
+ const int min_bits = bit_depth - 3;
+ for (int i = 1; i < num; ++i) {
+ const int delta = colors[i] - colors[i - 1];
+ deltas[i - 1] = delta;
+ assert(delta >= min_val);
+ if (delta > max_delta) max_delta = delta;
+ }
+ int bits_per_delta = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits);
+ assert(bits_per_delta <= bit_depth);
+ int range = (1 << bit_depth) - colors[0] - min_val;
+ for (int i = 0; i < num - 1; ++i) {
+ bits_cost += bits_per_delta;
+ range -= deltas[i];
+ bits_per_delta = AOMMIN(bits_per_delta, av1_ceil_log2(range));
}
- return AOMMAX(av1_ceil_log2(max_d), *min_bits);
+ return bits_cost;
}
-int av1_get_palette_delta_bits_u(const PALETTE_MODE_INFO *const pmi,
- int bit_depth, int *min_bits) {
- const int n = pmi->palette_size[1];
- int max_d = 0, i;
- *min_bits = bit_depth - 3;
- for (i = 1; i < n; ++i) {
- const int delta = pmi->palette_colors[PALETTE_MAX_SIZE + i] -
- pmi->palette_colors[PALETTE_MAX_SIZE + i - 1];
- assert(delta >= 0);
- if (delta > max_d) max_d = delta;
+int av1_index_color_cache(const uint16_t *color_cache, int n_cache,
+ const uint16_t *colors, int n_colors,
+ uint8_t *cache_color_found, int *out_cache_colors) {
+ if (n_cache <= 0) {
+ for (int i = 0; i < n_colors; ++i) out_cache_colors[i] = colors[i];
+ return n_colors;
}
- return AOMMAX(av1_ceil_log2(max_d + 1), *min_bits);
+ memset(cache_color_found, 0, n_cache * sizeof(*cache_color_found));
+ int n_in_cache = 0;
+ int in_cache_flags[PALETTE_MAX_SIZE];
+ memset(in_cache_flags, 0, sizeof(in_cache_flags));
+ for (int i = 0; i < n_cache && n_in_cache < n_colors; ++i) {
+ for (int j = 0; j < n_colors; ++j) {
+ if (colors[j] == color_cache[i]) {
+ in_cache_flags[j] = 1;
+ cache_color_found[i] = 1;
+ ++n_in_cache;
+ break;
+ }
+ }
+ }
+ int j = 0;
+ for (int i = 0; i < n_colors; ++i)
+ if (!in_cache_flags[i]) out_cache_colors[j++] = colors[i];
+ assert(j == n_colors - n_in_cache);
+ return j;
}
int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
@@ -199,10 +226,10 @@ int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
int *min_bits) {
const int n = pmi->palette_size[1];
const int max_val = 1 << bit_depth;
- int max_d = 0, i;
+ int max_d = 0;
*min_bits = bit_depth - 4;
*zero_count = 0;
- for (i = 1; i < n; ++i) {
+ for (int i = 1; i < n; ++i) {
const int delta = pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] -
pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1];
const int v = abs(delta);
@@ -215,26 +242,42 @@ int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
#endif // CONFIG_PALETTE_DELTA_ENCODING
int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi,
+#if CONFIG_PALETTE_DELTA_ENCODING
+ uint16_t *color_cache, int n_cache,
+#endif // CONFIG_PALETTE_DELTA_ENCODING
int bit_depth) {
const int n = pmi->palette_size[0];
#if CONFIG_PALETTE_DELTA_ENCODING
- int min_bits = 0;
- const int bits = av1_get_palette_delta_bits_y(pmi, bit_depth, &min_bits);
- return av1_cost_bit(128, 0) * (2 + bit_depth + bits * (n - 1));
+ int out_cache_colors[PALETTE_MAX_SIZE];
+ uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
+ const int n_out_cache =
+ av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n,
+ cache_color_found, out_cache_colors);
+ const int total_bits =
+ n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 1);
+ return total_bits * av1_cost_bit(128, 0);
#else
return bit_depth * n * av1_cost_bit(128, 0);
#endif // CONFIG_PALETTE_DELTA_ENCODING
}
int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
+#if CONFIG_PALETTE_DELTA_ENCODING
+ uint16_t *color_cache, int n_cache,
+#endif // CONFIG_PALETTE_DELTA_ENCODING
int bit_depth) {
const int n = pmi->palette_size[1];
#if CONFIG_PALETTE_DELTA_ENCODING
- int cost = 0;
+ int total_bits = 0;
// U channel palette color cost.
- int min_bits_u = 0;
- const int bits_u = av1_get_palette_delta_bits_u(pmi, bit_depth, &min_bits_u);
- cost += av1_cost_bit(128, 0) * (2 + bit_depth + bits_u * (n - 1));
+ int out_cache_colors[PALETTE_MAX_SIZE];
+ uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
+ const int n_out_cache = av1_index_color_cache(
+ color_cache, n_cache, pmi->palette_colors + PALETTE_MAX_SIZE, n,
+ cache_color_found, out_cache_colors);
+ total_bits +=
+ n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 0);
+
// V channel palette color cost.
int zero_count = 0, min_bits_v = 0;
const int bits_v =
@@ -242,8 +285,8 @@ int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
const int bits_using_delta =
2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count;
const int bits_using_raw = bit_depth * n;
- cost += av1_cost_bit(128, 0) * (1 + AOMMIN(bits_using_delta, bits_using_raw));
- return cost;
+ total_bits += 1 + AOMMIN(bits_using_delta, bits_using_raw);
+ return total_bits * av1_cost_bit(128, 0);
#else
return 2 * bit_depth * n * av1_cost_bit(128, 0);
#endif // CONFIG_PALETTE_DELTA_ENCODING
diff --git a/third_party/aom/av1/encoder/palette.h b/third_party/aom/av1/encoder/palette.h
index 5403ac5e6..f5a3c1bdd 100644
--- a/third_party/aom/av1/encoder/palette.h
+++ b/third_party/aom/av1/encoder/palette.h
@@ -45,13 +45,12 @@ int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_PALETTE_DELTA_ENCODING
-// Return the number of bits used to transmit each luma palette color delta.
-int av1_get_palette_delta_bits_y(const PALETTE_MODE_INFO *const pmi,
- int bit_depth, int *min_bits);
-
-// Return the number of bits used to transmit each U palette color delta.
-int av1_get_palette_delta_bits_u(const PALETTE_MODE_INFO *const pmi,
- int bit_depth, int *min_bits);
+// Given a color cache and a set of base colors, find if each cache color is
+// present in the base colors, record the binary results in "cache_color_found".
+// Record the colors that are not in the color cache in "out_cache_colors".
+int av1_index_color_cache(const uint16_t *color_cache, int n_cache,
+ const uint16_t *colors, int n_colors,
+ uint8_t *cache_color_found, int *out_cache_colors);
// Return the number of bits used to transmit each v palette color delta;
// assign zero_count with the number of deltas being 0.
@@ -60,10 +59,17 @@ int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
#endif // CONFIG_PALETTE_DELTA_ENCODING
// Return the rate cost for transmitting luma palette color values.
-int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi, int bit_depth);
+int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi,
+#if CONFIG_PALETTE_DELTA_ENCODING
+ uint16_t *color_cache, int n_cache,
+#endif // CONFIG_PALETTE_DELTA_ENCODING
+ int bit_depth);
// Return the rate cost for transmitting chroma palette color values.
int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
+#if CONFIG_PALETTE_DELTA_ENCODING
+ uint16_t *color_cache, int n_cache,
+#endif // CONFIG_PALETTE_DELTA_ENCODING
int bit_depth);
#ifdef __cplusplus
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c
index 21410e0af..4a446d24e 100644
--- a/third_party/aom/av1/encoder/pickrst.c
+++ b/third_party/aom/av1/encoder/pickrst.c
@@ -31,17 +31,18 @@
#include "av1/encoder/encoder.h"
#include "av1/encoder/picklpf.h"
#include "av1/encoder/pickrst.h"
+#include "av1/encoder/mathutils.h"
// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
// When set to RESTORE_NONE (0) we allow switchable.
const RestorationType force_restore_type = RESTORE_NONE;
// Number of Wiener iterations
-#define NUM_WIENER_ITERS 10
+#define NUM_WIENER_ITERS 5
typedef double (*search_restore_type)(const YV12_BUFFER_CONFIG *src,
AV1_COMP *cpi, int partial_frame,
- RestorationInfo *info,
+ int plane, RestorationInfo *info,
RestorationType *rest_level,
double *best_tile_cost,
YV12_BUFFER_CONFIG *dst_frame);
@@ -216,6 +217,62 @@ static int64_t get_pixel_proj_error(uint8_t *src8, int width, int height,
return err;
}
+#define USE_SGRPROJ_REFINEMENT_SEARCH 1
+static int64_t finer_search_pixel_proj_error(
+ uint8_t *src8, int width, int height, int src_stride, uint8_t *dat8,
+ int dat_stride, int bit_depth, int32_t *flt1, int flt1_stride,
+ int32_t *flt2, int flt2_stride, int start_step, int *xqd) {
+ int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8,
+ dat_stride, bit_depth, flt1, flt1_stride,
+ flt2, flt2_stride, xqd);
+ (void)start_step;
+#if USE_SGRPROJ_REFINEMENT_SEARCH
+ int64_t err2;
+ int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 };
+ int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
+ for (int s = start_step; s >= 1; s >>= 1) {
+ for (int p = 0; p < 2; ++p) {
+ int skip = 0;
+ do {
+ if (xqd[p] - s >= tap_min[p]) {
+ xqd[p] -= s;
+ err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
+ dat_stride, bit_depth, flt1, flt1_stride,
+ flt2, flt2_stride, xqd);
+ if (err2 > err) {
+ xqd[p] += s;
+ } else {
+ err = err2;
+ skip = 1;
+ // At the highest step size continue moving in the same direction
+ if (s == start_step) continue;
+ }
+ }
+ break;
+ } while (1);
+ if (skip) break;
+ do {
+ if (xqd[p] + s <= tap_max[p]) {
+ xqd[p] += s;
+ err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
+ dat_stride, bit_depth, flt1, flt1_stride,
+ flt2, flt2_stride, xqd);
+ if (err2 > err) {
+ xqd[p] -= s;
+ } else {
+ err = err2;
+ // At the highest step size continue moving in the same direction
+ if (s == start_step) continue;
+ }
+ }
+ break;
+ } while (1);
+ }
+ }
+#endif // USE_SGRPROJ_REFINEMENT_SEARCH
+ return err;
+}
+
static void get_proj_subspace(uint8_t *src8, int width, int height,
int src_stride, uint8_t *dat8, int dat_stride,
int bit_depth, int32_t *flt1, int flt1_stride,
@@ -329,12 +386,14 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
#if CONFIG_HIGHBITDEPTH
}
#endif
+ aom_clear_system_state();
get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
bit_depth, flt1, width, flt2, width, exq);
+ aom_clear_system_state();
encode_xq(exq, exqd);
- err =
- get_pixel_proj_error(src8, width, height, src_stride, dat8, dat_stride,
- bit_depth, flt1, width, flt2, width, exqd);
+ err = finer_search_pixel_proj_error(src8, width, height, src_stride, dat8,
+ dat_stride, bit_depth, flt1, width,
+ flt2, width, 2, exqd);
if (besterr == -1 || err < besterr) {
bestep = ep;
besterr = err;
@@ -362,8 +421,9 @@ static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
}
static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
- int partial_frame, RestorationInfo *info,
- RestorationType *type, double *best_tile_cost,
+ int partial_frame, int plane,
+ RestorationInfo *info, RestorationType *type,
+ double *best_tile_cost,
YV12_BUFFER_CONFIG *dst_frame) {
SgrprojInfo *sgrproj_info = info->sgrproj_info;
double err, cost_norestore, cost_sgrproj;
@@ -374,44 +434,68 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
RestorationInfo *rsi = &cpi->rst_search[0];
int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
int h_start, h_end, v_start, v_end;
- // Allocate for the src buffer at high precision
- const int ntiles = av1_get_rest_ntiles(
- cm->width, cm->height, cm->rst_info[0].restoration_tilesize, &tile_width,
- &tile_height, &nhtiles, &nvtiles);
+ int width, height, src_stride, dgd_stride;
+ uint8_t *dgd_buffer, *src_buffer;
+ if (plane == AOM_PLANE_Y) {
+ width = cm->width;
+ height = cm->height;
+ src_buffer = src->y_buffer;
+ src_stride = src->y_stride;
+ dgd_buffer = dgd->y_buffer;
+ dgd_stride = dgd->y_stride;
+ assert(width == dgd->y_crop_width);
+ assert(height == dgd->y_crop_height);
+ assert(width == src->y_crop_width);
+ assert(height == src->y_crop_height);
+ } else {
+ width = src->uv_crop_width;
+ height = src->uv_crop_height;
+ src_stride = src->uv_stride;
+ dgd_stride = dgd->uv_stride;
+ src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer;
+ dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer;
+ assert(width == dgd->uv_crop_width);
+ assert(height == dgd->uv_crop_height);
+ }
+ const int ntiles =
+ av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize,
+ &tile_width, &tile_height, &nhtiles, &nvtiles);
SgrprojInfo ref_sgrproj_info;
set_default_sgrproj(&ref_sgrproj_info);
- rsi->frame_restoration_type = RESTORE_SGRPROJ;
+ rsi[plane].frame_restoration_type = RESTORE_SGRPROJ;
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
- rsi->restoration_type[tile_idx] = RESTORE_NONE;
+ rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
}
// Compute best Sgrproj filters for each tile
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
- tile_height, cm->width, cm->height, 0, 0, &h_start,
- &h_end, &v_start, &v_end);
+ tile_height, width, height, 0, 0, &h_start, &h_end,
+ &v_start, &v_end);
err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start,
- h_end - h_start, v_start, v_end - v_start, 1);
+ h_end - h_start, v_start, v_end - v_start,
+ (1 << plane));
// #bits when a tile is not restored
bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0);
cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
best_tile_cost[tile_idx] = DBL_MAX;
search_selfguided_restoration(
- dgd->y_buffer + v_start * dgd->y_stride + h_start, h_end - h_start,
- v_end - v_start, dgd->y_stride,
- src->y_buffer + v_start * src->y_stride + h_start, src->y_stride,
+ dgd_buffer + v_start * dgd_stride + h_start, h_end - h_start,
+ v_end - v_start, dgd_stride,
+ src_buffer + v_start * src_stride + h_start, src_stride,
#if CONFIG_HIGHBITDEPTH
cm->bit_depth,
#else
8,
#endif // CONFIG_HIGHBITDEPTH
- &rsi->sgrproj_info[tile_idx].ep, rsi->sgrproj_info[tile_idx].xqd,
- cm->rst_internal.tmpbuf);
- rsi->restoration_type[tile_idx] = RESTORE_SGRPROJ;
- err = try_restoration_tile(src, cpi, rsi, 1, partial_frame, tile_idx, 0, 0,
- dst_frame);
- bits = count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info)
+ &rsi[plane].sgrproj_info[tile_idx].ep,
+ rsi[plane].sgrproj_info[tile_idx].xqd, cm->rst_internal.tmpbuf);
+ rsi[plane].restoration_type[tile_idx] = RESTORE_SGRPROJ;
+ err = try_restoration_tile(src, cpi, rsi, (1 << plane), partial_frame,
+ tile_idx, 0, 0, dst_frame);
+ bits = count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx],
+ &ref_sgrproj_info)
<< AV1_PROB_COST_SHIFT;
bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1);
cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
@@ -419,35 +503,34 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
type[tile_idx] = RESTORE_NONE;
} else {
type[tile_idx] = RESTORE_SGRPROJ;
- memcpy(&sgrproj_info[tile_idx], &rsi->sgrproj_info[tile_idx],
+ memcpy(&sgrproj_info[tile_idx], &rsi[plane].sgrproj_info[tile_idx],
sizeof(sgrproj_info[tile_idx]));
- bits = count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info)
- << AV1_PROB_COST_SHIFT;
memcpy(&ref_sgrproj_info, &sgrproj_info[tile_idx],
sizeof(ref_sgrproj_info));
best_tile_cost[tile_idx] = err;
}
- rsi->restoration_type[tile_idx] = RESTORE_NONE;
+ rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
}
// Cost for Sgrproj filtering
set_default_sgrproj(&ref_sgrproj_info);
- bits = frame_level_restore_bits[rsi->frame_restoration_type]
+ bits = frame_level_restore_bits[rsi[plane].frame_restoration_type]
<< AV1_PROB_COST_SHIFT;
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
bits +=
av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, type[tile_idx] != RESTORE_NONE);
- memcpy(&rsi->sgrproj_info[tile_idx], &sgrproj_info[tile_idx],
+ memcpy(&rsi[plane].sgrproj_info[tile_idx], &sgrproj_info[tile_idx],
sizeof(sgrproj_info[tile_idx]));
if (type[tile_idx] == RESTORE_SGRPROJ) {
- bits +=
- count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info)
- << AV1_PROB_COST_SHIFT;
- memcpy(&ref_sgrproj_info, &rsi->sgrproj_info[tile_idx],
+ bits += count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx],
+ &ref_sgrproj_info)
+ << AV1_PROB_COST_SHIFT;
+ memcpy(&ref_sgrproj_info, &rsi[plane].sgrproj_info[tile_idx],
sizeof(ref_sgrproj_info));
}
- rsi->restoration_type[tile_idx] = type[tile_idx];
+ rsi[plane].restoration_type[tile_idx] = type[tile_idx];
}
- err = try_restoration_frame(src, cpi, rsi, 1, partial_frame, dst_frame);
+ err = try_restoration_frame(src, cpi, rsi, (1 << plane), partial_frame,
+ dst_frame);
cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
return cost_sgrproj;
@@ -560,46 +643,6 @@ static void compute_stats_highbd(uint8_t *dgd8, uint8_t *src8, int h_start,
}
#endif // CONFIG_HIGHBITDEPTH
-// Solves Ax = b, where x and b are column vectors
-static int linsolve(int n, double *A, int stride, double *b, double *x) {
- int i, j, k;
- double c;
-
- aom_clear_system_state();
-
- // Forward elimination
- for (k = 0; k < n - 1; k++) {
- // Bring the largest magitude to the diagonal position
- for (i = n - 1; i > k; i--) {
- if (fabs(A[(i - 1) * stride + k]) < fabs(A[i * stride + k])) {
- for (j = 0; j < n; j++) {
- c = A[i * stride + j];
- A[i * stride + j] = A[(i - 1) * stride + j];
- A[(i - 1) * stride + j] = c;
- }
- c = b[i];
- b[i] = b[i - 1];
- b[i - 1] = c;
- }
- }
- for (i = k; i < n - 1; i++) {
- if (fabs(A[k * stride + k]) < 1e-10) return 0;
- c = A[(i + 1) * stride + k] / A[k * stride + k];
- for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j];
- b[i + 1] -= c * b[k];
- }
- }
- // Backward substitution
- for (i = n - 1; i >= 0; i--) {
- if (fabs(A[i * stride + i]) < 1e-10) return 0;
- c = 0;
- for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j];
- x[i] = (b[i] - c) / A[i * stride + i];
- }
-
- return 1;
-}
-
static INLINE int wrap_index(int i) {
return (i >= WIENER_HALFWIN1 ? WIENER_WIN - 1 - i : i);
}
@@ -696,8 +739,10 @@ static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) {
static int wiener_decompose_sep_sym(double *M, double *H, double *a,
double *b) {
- static const double init_filt[WIENER_WIN] = {
- 0.035623, -0.127154, 0.211436, 0.760190, 0.211436, -0.127154, 0.035623,
+ static const int init_filt[WIENER_WIN] = {
+ WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV,
+ WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV,
+ WIENER_FILT_TAP0_MIDV,
};
int i, j, iter;
double *Hc[WIENER_WIN2];
@@ -709,8 +754,9 @@ static int wiener_decompose_sep_sym(double *M, double *H, double *a,
H + i * WIENER_WIN * WIENER_WIN2 + j * WIENER_WIN;
}
}
- memcpy(a, init_filt, sizeof(*a) * WIENER_WIN);
- memcpy(b, init_filt, sizeof(*b) * WIENER_WIN);
+ for (i = 0; i < WIENER_WIN; i++) {
+ a[i] = b[i] = (double)init_filt[i] / WIENER_FILT_STEP;
+ }
iter = 1;
while (iter < NUM_WIENER_ITERS) {
@@ -812,158 +858,117 @@ static int count_wiener_bits(WienerInfo *wiener_info,
return bits;
}
-static double search_wiener_uv(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
- int partial_frame, int plane,
- RestorationInfo *info, RestorationType *type,
- YV12_BUFFER_CONFIG *dst_frame) {
- WienerInfo *wiener_info = info->wiener_info;
- AV1_COMMON *const cm = &cpi->common;
- RestorationInfo *rsi = cpi->rst_search;
- int64_t err;
- int bits;
- double cost_wiener, cost_norestore, cost_wiener_frame, cost_norestore_frame;
- MACROBLOCK *x = &cpi->td.mb;
- double M[WIENER_WIN2];
- double H[WIENER_WIN2 * WIENER_WIN2];
- double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
- const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
- const int width = src->uv_crop_width;
- const int height = src->uv_crop_height;
- const int src_stride = src->uv_stride;
- const int dgd_stride = dgd->uv_stride;
- double score;
- int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
- int h_start, h_end, v_start, v_end;
- const int ntiles =
- av1_get_rest_ntiles(width, height, cm->rst_info[1].restoration_tilesize,
- &tile_width, &tile_height, &nhtiles, &nvtiles);
- WienerInfo ref_wiener_info;
- set_default_wiener(&ref_wiener_info);
- assert(width == dgd->uv_crop_width);
- assert(height == dgd->uv_crop_height);
-
- rsi[plane].frame_restoration_type = RESTORE_NONE;
- err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane));
- bits = 0;
- cost_norestore_frame = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
-
- rsi[plane].frame_restoration_type = RESTORE_WIENER;
-
- for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
- rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
- }
-
- // Compute best Wiener filters for each tile
- for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
- av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
- tile_height, width, height, 0, 0, &h_start, &h_end,
- &v_start, &v_end);
- err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start,
- h_end - h_start, v_start, v_end - v_start,
- 1 << plane);
- // #bits when a tile is not restored
- bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0);
- cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
- // best_tile_cost[tile_idx] = DBL_MAX;
-
- av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
- tile_height, width, height, WIENER_HALFWIN,
- WIENER_HALFWIN, &h_start, &h_end, &v_start,
- &v_end);
- if (plane == AOM_PLANE_U) {
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- compute_stats_highbd(dgd->u_buffer, src->u_buffer, h_start, h_end,
- v_start, v_end, dgd_stride, src_stride, M, H);
- else
-#endif // CONFIG_HIGHBITDEPTH
- compute_stats(dgd->u_buffer, src->u_buffer, h_start, h_end, v_start,
- v_end, dgd_stride, src_stride, M, H);
- } else if (plane == AOM_PLANE_V) {
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth)
- compute_stats_highbd(dgd->v_buffer, src->v_buffer, h_start, h_end,
- v_start, v_end, dgd_stride, src_stride, M, H);
- else
-#endif // CONFIG_HIGHBITDEPTH
- compute_stats(dgd->v_buffer, src->v_buffer, h_start, h_end, v_start,
- v_end, dgd_stride, src_stride, M, H);
- } else {
- assert(0);
- }
-
- type[tile_idx] = RESTORE_WIENER;
-
- if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) {
- type[tile_idx] = RESTORE_NONE;
- continue;
- }
- quantize_sym_filter(vfilterd, rsi[plane].wiener_info[tile_idx].vfilter);
- quantize_sym_filter(hfilterd, rsi[plane].wiener_info[tile_idx].hfilter);
-
- // Filter score computes the value of the function x'*A*x - x'*b for the
- // learned filter and compares it against identity filer. If there is no
- // reduction in the function, the filter is reverted back to identity
- score = compute_score(M, H, rsi[plane].wiener_info[tile_idx].vfilter,
- rsi[plane].wiener_info[tile_idx].hfilter);
- if (score > 0.0) {
- type[tile_idx] = RESTORE_NONE;
- continue;
- }
-
- rsi[plane].restoration_type[tile_idx] = RESTORE_WIENER;
- err = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
- tile_idx, 0, 0, dst_frame);
- bits =
- count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info)
- << AV1_PROB_COST_SHIFT;
- // bits = WIENER_FILT_BITS << AV1_PROB_COST_SHIFT;
- bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1);
- cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
- if (cost_wiener >= cost_norestore) {
- type[tile_idx] = RESTORE_NONE;
- } else {
- type[tile_idx] = RESTORE_WIENER;
- memcpy(&wiener_info[tile_idx], &rsi[plane].wiener_info[tile_idx],
- sizeof(wiener_info[tile_idx]));
- memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx],
- sizeof(ref_wiener_info));
+#define USE_WIENER_REFINEMENT_SEARCH 1
+static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
+ AV1_COMP *cpi, RestorationInfo *rsi,
+ int start_step, int plane, int tile_idx,
+ int partial_frame,
+ YV12_BUFFER_CONFIG *dst_frame) {
+ int64_t err = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
+ tile_idx, 0, 0, dst_frame);
+ (void)start_step;
+#if USE_WIENER_REFINEMENT_SEARCH
+ int64_t err2;
+ int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
+ WIENER_FILT_TAP2_MINV };
+ int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV,
+ WIENER_FILT_TAP2_MAXV };
+ // printf("err pre = %"PRId64"\n", err);
+ for (int s = start_step; s >= 1; s >>= 1) {
+ for (int p = 0; p < WIENER_HALFWIN; ++p) {
+ int skip = 0;
+ do {
+ if (rsi[plane].wiener_info[tile_idx].hfilter[p] - s >= tap_min[p]) {
+ rsi[plane].wiener_info[tile_idx].hfilter[p] -= s;
+ rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s;
+ rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] += 2 * s;
+ err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
+ tile_idx, 0, 0, dst_frame);
+ if (err2 > err) {
+ rsi[plane].wiener_info[tile_idx].hfilter[p] += s;
+ rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s;
+ rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -= 2 * s;
+ } else {
+ err = err2;
+ skip = 1;
+ // At the highest step size continue moving in the same direction
+ if (s == start_step) continue;
+ }
+ }
+ break;
+ } while (1);
+ if (skip) break;
+ do {
+ if (rsi[plane].wiener_info[tile_idx].hfilter[p] + s <= tap_max[p]) {
+ rsi[plane].wiener_info[tile_idx].hfilter[p] += s;
+ rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s;
+ rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -= 2 * s;
+ err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
+ tile_idx, 0, 0, dst_frame);
+ if (err2 > err) {
+ rsi[plane].wiener_info[tile_idx].hfilter[p] -= s;
+ rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s;
+ rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] += 2 * s;
+ } else {
+ err = err2;
+ // At the highest step size continue moving in the same direction
+ if (s == start_step) continue;
+ }
+ }
+ break;
+ } while (1);
}
- rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
- }
- // Cost for Wiener filtering
- set_default_wiener(&ref_wiener_info);
- bits = 0;
- for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
- bits +=
- av1_cost_bit(RESTORE_NONE_WIENER_PROB, type[tile_idx] != RESTORE_NONE);
- memcpy(&rsi[plane].wiener_info[tile_idx], &wiener_info[tile_idx],
- sizeof(wiener_info[tile_idx]));
- if (type[tile_idx] == RESTORE_WIENER) {
- bits +=
- count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info)
- << AV1_PROB_COST_SHIFT;
- memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx],
- sizeof(ref_wiener_info));
+ for (int p = 0; p < WIENER_HALFWIN; ++p) {
+ int skip = 0;
+ do {
+ if (rsi[plane].wiener_info[tile_idx].vfilter[p] - s >= tap_min[p]) {
+ rsi[plane].wiener_info[tile_idx].vfilter[p] -= s;
+ rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s;
+ rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] += 2 * s;
+ err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
+ tile_idx, 0, 0, dst_frame);
+ if (err2 > err) {
+ rsi[plane].wiener_info[tile_idx].vfilter[p] += s;
+ rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s;
+ rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -= 2 * s;
+ } else {
+ err = err2;
+ skip = 1;
+ // At the highest step size continue moving in the same direction
+ if (s == start_step) continue;
+ }
+ }
+ break;
+ } while (1);
+ if (skip) break;
+ do {
+ if (rsi[plane].wiener_info[tile_idx].vfilter[p] + s <= tap_max[p]) {
+ rsi[plane].wiener_info[tile_idx].vfilter[p] += s;
+ rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s;
+ rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -= 2 * s;
+ err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
+ tile_idx, 0, 0, dst_frame);
+ if (err2 > err) {
+ rsi[plane].wiener_info[tile_idx].vfilter[p] -= s;
+ rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s;
+ rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] += 2 * s;
+ } else {
+ err = err2;
+ // At the highest step size continue moving in the same direction
+ if (s == start_step) continue;
+ }
+ }
+ break;
+ } while (1);
}
- rsi[plane].restoration_type[tile_idx] = type[tile_idx];
- }
- err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame,
- dst_frame);
- cost_wiener_frame = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
-
- if (cost_wiener_frame < cost_norestore_frame) {
- info->frame_restoration_type = RESTORE_WIENER;
- } else {
- info->frame_restoration_type = RESTORE_NONE;
}
-
- return info->frame_restoration_type == RESTORE_WIENER ? cost_wiener_frame
- : cost_norestore_frame;
+// printf("err post = %"PRId64"\n", err);
+#endif // USE_WIENER_REFINEMENT_SEARCH
+ return err;
}
static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
- int partial_frame, RestorationInfo *info,
+ int partial_frame, int plane, RestorationInfo *info,
RestorationType *type, double *best_tile_cost,
YV12_BUFFER_CONFIG *dst_frame) {
WienerInfo *wiener_info = info->wiener_info;
@@ -977,38 +982,52 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
double H[WIENER_WIN2 * WIENER_WIN2];
double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
- const int width = cm->width;
- const int height = cm->height;
- const int src_stride = src->y_stride;
- const int dgd_stride = dgd->y_stride;
+ int width, height, src_stride, dgd_stride;
+ uint8_t *dgd_buffer, *src_buffer;
+ if (plane == AOM_PLANE_Y) {
+ width = cm->width;
+ height = cm->height;
+ src_buffer = src->y_buffer;
+ src_stride = src->y_stride;
+ dgd_buffer = dgd->y_buffer;
+ dgd_stride = dgd->y_stride;
+ assert(width == dgd->y_crop_width);
+ assert(height == dgd->y_crop_height);
+ assert(width == src->y_crop_width);
+ assert(height == src->y_crop_height);
+ } else {
+ width = src->uv_crop_width;
+ height = src->uv_crop_height;
+ src_stride = src->uv_stride;
+ dgd_stride = dgd->uv_stride;
+ src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer;
+ dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer;
+ assert(width == dgd->uv_crop_width);
+ assert(height == dgd->uv_crop_height);
+ }
double score;
int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
int h_start, h_end, v_start, v_end;
- const int ntiles =
- av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize,
- &tile_width, &tile_height, &nhtiles, &nvtiles);
+ const int ntiles = av1_get_rest_ntiles(
+ width, height, cm->rst_info[plane].restoration_tilesize, &tile_width,
+ &tile_height, &nhtiles, &nvtiles);
WienerInfo ref_wiener_info;
set_default_wiener(&ref_wiener_info);
- assert(width == dgd->y_crop_width);
- assert(height == dgd->y_crop_height);
- assert(width == src->y_crop_width);
- assert(height == src->y_crop_height);
-
- rsi->frame_restoration_type = RESTORE_WIENER;
+ rsi[plane].frame_restoration_type = RESTORE_WIENER;
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
- rsi->restoration_type[tile_idx] = RESTORE_NONE;
+ rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
}
// Construct a (WIENER_HALFWIN)-pixel border around the frame
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth)
- extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd->y_buffer), width, height,
+ extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd_buffer), width, height,
dgd_stride);
else
#endif
- extend_frame(dgd->y_buffer, width, height, dgd_stride);
+ extend_frame(dgd_buffer, width, height, dgd_stride);
// Compute best Wiener filters for each tile
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
@@ -1016,7 +1035,8 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
tile_height, width, height, 0, 0, &h_start, &h_end,
&v_start, &v_end);
err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start,
- h_end - h_start, v_start, v_end - v_start, 1);
+ h_end - h_start, v_start, v_end - v_start,
+ (1 << plane));
// #bits when a tile is not restored
bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0);
cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
@@ -1027,12 +1047,12 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
&v_start, &v_end);
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth)
- compute_stats_highbd(dgd->y_buffer, src->y_buffer, h_start, h_end,
- v_start, v_end, dgd_stride, src_stride, M, H);
+ compute_stats_highbd(dgd_buffer, src_buffer, h_start, h_end, v_start,
+ v_end, dgd_stride, src_stride, M, H);
else
#endif // CONFIG_HIGHBITDEPTH
- compute_stats(dgd->y_buffer, src->y_buffer, h_start, h_end, v_start,
- v_end, dgd_stride, src_stride, M, H);
+ compute_stats(dgd_buffer, src_buffer, h_start, h_end, v_start, v_end,
+ dgd_stride, src_stride, M, H);
type[tile_idx] = RESTORE_WIENER;
@@ -1040,108 +1060,129 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
type[tile_idx] = RESTORE_NONE;
continue;
}
- quantize_sym_filter(vfilterd, rsi->wiener_info[tile_idx].vfilter);
- quantize_sym_filter(hfilterd, rsi->wiener_info[tile_idx].hfilter);
+ quantize_sym_filter(vfilterd, rsi[plane].wiener_info[tile_idx].vfilter);
+ quantize_sym_filter(hfilterd, rsi[plane].wiener_info[tile_idx].hfilter);
// Filter score computes the value of the function x'*A*x - x'*b for the
// learned filter and compares it against identity filer. If there is no
// reduction in the function, the filter is reverted back to identity
- score = compute_score(M, H, rsi->wiener_info[tile_idx].vfilter,
- rsi->wiener_info[tile_idx].hfilter);
+ score = compute_score(M, H, rsi[plane].wiener_info[tile_idx].vfilter,
+ rsi[plane].wiener_info[tile_idx].hfilter);
if (score > 0.0) {
type[tile_idx] = RESTORE_NONE;
continue;
}
+ aom_clear_system_state();
- rsi->restoration_type[tile_idx] = RESTORE_WIENER;
- err = try_restoration_tile(src, cpi, rsi, 1, partial_frame, tile_idx, 0, 0,
- dst_frame);
- bits = count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info)
- << AV1_PROB_COST_SHIFT;
+ rsi[plane].restoration_type[tile_idx] = RESTORE_WIENER;
+ err = finer_tile_search_wiener(src, cpi, rsi, 4, plane, tile_idx,
+ partial_frame, dst_frame);
+ bits =
+ count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info)
+ << AV1_PROB_COST_SHIFT;
bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1);
cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
if (cost_wiener >= cost_norestore) {
type[tile_idx] = RESTORE_NONE;
} else {
type[tile_idx] = RESTORE_WIENER;
- memcpy(&wiener_info[tile_idx], &rsi->wiener_info[tile_idx],
+ memcpy(&wiener_info[tile_idx], &rsi[plane].wiener_info[tile_idx],
sizeof(wiener_info[tile_idx]));
- memcpy(&ref_wiener_info, &rsi->wiener_info[tile_idx],
+ memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx],
sizeof(ref_wiener_info));
- bits = count_wiener_bits(&wiener_info[tile_idx], &ref_wiener_info)
- << AV1_PROB_COST_SHIFT;
best_tile_cost[tile_idx] = err;
}
- rsi->restoration_type[tile_idx] = RESTORE_NONE;
+ rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
}
// Cost for Wiener filtering
set_default_wiener(&ref_wiener_info);
- bits = frame_level_restore_bits[rsi->frame_restoration_type]
+ bits = frame_level_restore_bits[rsi[plane].frame_restoration_type]
<< AV1_PROB_COST_SHIFT;
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
bits +=
av1_cost_bit(RESTORE_NONE_WIENER_PROB, type[tile_idx] != RESTORE_NONE);
- memcpy(&rsi->wiener_info[tile_idx], &wiener_info[tile_idx],
+ memcpy(&rsi[plane].wiener_info[tile_idx], &wiener_info[tile_idx],
sizeof(wiener_info[tile_idx]));
if (type[tile_idx] == RESTORE_WIENER) {
- bits += count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info)
- << AV1_PROB_COST_SHIFT;
- memcpy(&ref_wiener_info, &rsi->wiener_info[tile_idx],
+ bits +=
+ count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info)
+ << AV1_PROB_COST_SHIFT;
+ memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx],
sizeof(ref_wiener_info));
}
- rsi->restoration_type[tile_idx] = type[tile_idx];
+ rsi[plane].restoration_type[tile_idx] = type[tile_idx];
}
- err = try_restoration_frame(src, cpi, rsi, 1, partial_frame, dst_frame);
+ err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame,
+ dst_frame);
cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
return cost_wiener;
}
static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
- int partial_frame, RestorationInfo *info,
- RestorationType *type, double *best_tile_cost,
+ int partial_frame, int plane,
+ RestorationInfo *info, RestorationType *type,
+ double *best_tile_cost,
YV12_BUFFER_CONFIG *dst_frame) {
- double err, cost_norestore;
+ int64_t err;
+ double cost_norestore;
int bits;
MACROBLOCK *x = &cpi->td.mb;
AV1_COMMON *const cm = &cpi->common;
int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
int h_start, h_end, v_start, v_end;
+ int width, height;
+ if (plane == AOM_PLANE_Y) {
+ width = cm->width;
+ height = cm->height;
+ } else {
+ width = src->uv_crop_width;
+ height = src->uv_crop_height;
+ }
const int ntiles = av1_get_rest_ntiles(
- cm->width, cm->height, cm->rst_info[0].restoration_tilesize, &tile_width,
+ width, height, cm->rst_info[plane].restoration_tilesize, &tile_width,
&tile_height, &nhtiles, &nvtiles);
(void)info;
(void)dst_frame;
(void)partial_frame;
+ info->frame_restoration_type = RESTORE_NONE;
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
- tile_height, cm->width, cm->height, 0, 0, &h_start,
- &h_end, &v_start, &v_end);
+ tile_height, width, height, 0, 0, &h_start, &h_end,
+ &v_start, &v_end);
err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start,
- h_end - h_start, v_start, v_end - v_start, 1);
+ h_end - h_start, v_start, v_end - v_start,
+ 1 << plane);
type[tile_idx] = RESTORE_NONE;
best_tile_cost[tile_idx] = err;
}
// RD cost associated with no restoration
- err = sse_restoration_tile(src, cm->frame_to_show, cm, 0, cm->width, 0,
- cm->height, 1);
+ err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane));
bits = frame_level_restore_bits[RESTORE_NONE] << AV1_PROB_COST_SHIFT;
cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err);
return cost_norestore;
}
static double search_switchable_restoration(
- AV1_COMP *cpi, int partial_frame, RestorationInfo *rsi,
+ AV1_COMP *cpi, int partial_frame, int plane, RestorationInfo *rsi,
double *tile_cost[RESTORE_SWITCHABLE_TYPES]) {
AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *x = &cpi->td.mb;
double cost_switchable = 0;
int bits, tile_idx;
RestorationType r;
- const int ntiles = av1_get_rest_ntiles(cm->width, cm->height,
- cm->rst_info[0].restoration_tilesize,
- NULL, NULL, NULL, NULL);
+ int width, height;
+ if (plane == AOM_PLANE_Y) {
+ width = cm->width;
+ height = cm->height;
+ } else {
+ width = ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x);
+ height = ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y);
+ }
+ const int ntiles = av1_get_rest_ntiles(
+ width, height, cm->rst_info[plane].restoration_tilesize, NULL, NULL, NULL,
+ NULL);
SgrprojInfo ref_sgrproj_info;
set_default_sgrproj(&ref_sgrproj_info);
WienerInfo ref_wiener_info;
@@ -1203,57 +1244,60 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
double best_cost_restore;
RestorationType r, best_restore;
- const int ntiles = av1_get_rest_ntiles(cm->width, cm->height,
- cm->rst_info[0].restoration_tilesize,
- NULL, NULL, NULL, NULL);
+ const int ntiles_y = av1_get_rest_ntiles(cm->width, cm->height,
+ cm->rst_info[0].restoration_tilesize,
+ NULL, NULL, NULL, NULL);
+ const int ntiles_uv = av1_get_rest_ntiles(
+ ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x),
+ ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y),
+ cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL);
+ // Assume ntiles_uv is never larger that ntiles_y and so the same arrays work.
for (r = 0; r < RESTORE_SWITCHABLE_TYPES; r++) {
- tile_cost[r] = (double *)aom_malloc(sizeof(*tile_cost[0]) * ntiles);
+ tile_cost[r] = (double *)aom_malloc(sizeof(*tile_cost[0]) * ntiles_y);
restore_types[r] =
- (RestorationType *)aom_malloc(sizeof(*restore_types[0]) * ntiles);
- }
-
- for (r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) {
- if (force_restore_type != 0)
- if (r != RESTORE_NONE && r != force_restore_type) continue;
- cost_restore[r] = search_restore_fun[r](
- src, cpi, method == LPF_PICK_FROM_SUBIMAGE, &cm->rst_info[0],
- restore_types[r], tile_cost[r], &cpi->trial_frame_rst);
+ (RestorationType *)aom_malloc(sizeof(*restore_types[0]) * ntiles_y);
}
- cost_restore[RESTORE_SWITCHABLE] = search_switchable_restoration(
- cpi, method == LPF_PICK_FROM_SUBIMAGE, &cm->rst_info[0], tile_cost);
- best_cost_restore = DBL_MAX;
- best_restore = 0;
- for (r = 0; r < RESTORE_TYPES; ++r) {
+ for (int plane = AOM_PLANE_Y; plane <= AOM_PLANE_V; ++plane) {
+ for (r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) {
+ cost_restore[r] = DBL_MAX;
+ if (force_restore_type != 0)
+ if (r != RESTORE_NONE && r != force_restore_type) continue;
+ cost_restore[r] =
+ search_restore_fun[r](src, cpi, method == LPF_PICK_FROM_SUBIMAGE,
+ plane, &cm->rst_info[plane], restore_types[r],
+ tile_cost[r], &cpi->trial_frame_rst);
+ }
+ if (plane == AOM_PLANE_Y)
+ cost_restore[RESTORE_SWITCHABLE] =
+ search_switchable_restoration(cpi, method == LPF_PICK_FROM_SUBIMAGE,
+ plane, &cm->rst_info[plane], tile_cost);
+ else
+ cost_restore[RESTORE_SWITCHABLE] = DBL_MAX;
+ best_cost_restore = DBL_MAX;
+ best_restore = 0;
+ for (r = 0; r < RESTORE_TYPES; ++r) {
+ if (force_restore_type != 0)
+ if (r != RESTORE_NONE && r != force_restore_type) continue;
+ if (cost_restore[r] < best_cost_restore) {
+ best_restore = r;
+ best_cost_restore = cost_restore[r];
+ }
+ }
+ cm->rst_info[plane].frame_restoration_type = best_restore;
if (force_restore_type != 0)
- if (r != RESTORE_NONE && r != force_restore_type) continue;
- if (cost_restore[r] < best_cost_restore) {
- best_restore = r;
- best_cost_restore = cost_restore[r];
+ assert(best_restore == force_restore_type ||
+ best_restore == RESTORE_NONE);
+ if (best_restore != RESTORE_SWITCHABLE) {
+ const int nt = (plane == AOM_PLANE_Y ? ntiles_y : ntiles_uv);
+ memcpy(cm->rst_info[plane].restoration_type, restore_types[best_restore],
+ nt * sizeof(restore_types[best_restore][0]));
}
}
- cm->rst_info[0].frame_restoration_type = best_restore;
- if (force_restore_type != 0)
- assert(best_restore == force_restore_type || best_restore == RESTORE_NONE);
- if (best_restore != RESTORE_SWITCHABLE) {
- memcpy(cm->rst_info[0].restoration_type, restore_types[best_restore],
- ntiles * sizeof(restore_types[best_restore][0]));
- }
-
- // Color components
- search_wiener_uv(src, cpi, method == LPF_PICK_FROM_SUBIMAGE, AOM_PLANE_U,
- &cm->rst_info[AOM_PLANE_U],
- cm->rst_info[AOM_PLANE_U].restoration_type,
- &cpi->trial_frame_rst);
- search_wiener_uv(src, cpi, method == LPF_PICK_FROM_SUBIMAGE, AOM_PLANE_V,
- &cm->rst_info[AOM_PLANE_V],
- cm->rst_info[AOM_PLANE_V].restoration_type,
- &cpi->trial_frame_rst);
/*
- printf("Frame %d/%d restore types: %d %d %d\n",
- cm->current_video_frame, cm->show_frame,
- cm->rst_info[0].frame_restoration_type,
+ printf("Frame %d/%d restore types: %d %d %d\n", cm->current_video_frame,
+ cm->show_frame, cm->rst_info[0].frame_restoration_type,
cm->rst_info[1].frame_restoration_type,
cm->rst_info[2].frame_restoration_type);
printf("Frame %d/%d frame_restore_type %d : %f %f %f %f\n",
diff --git a/third_party/aom/av1/encoder/pvq_encoder.c b/third_party/aom/av1/encoder/pvq_encoder.c
index ab63f1b7d..9d5133012 100644
--- a/third_party/aom/av1/encoder/pvq_encoder.c
+++ b/third_party/aom/av1/encoder/pvq_encoder.c
@@ -247,23 +247,23 @@ static double od_pvq_rate(int qg, int icgr, int theta, int ts,
aom_writer w;
od_pvq_codeword_ctx cd;
int tell;
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
od_ec_enc_init(&w.ec, 1000);
#else
-# error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+# error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
OD_COPY(&cd, &adapt->pvq.pvq_codeword_ctx, 1);
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
tell = od_ec_enc_tell_frac(&w.ec);
#else
-# error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+# error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
aom_encode_pvq_codeword(&w, &cd, y0, n - (theta != -1), k);
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
rate = (od_ec_enc_tell_frac(&w.ec)-tell)/8.;
od_ec_enc_clear(&w.ec);
#else
-# error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+# error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
}
if (qg > 0 && theta >= 0) {
@@ -847,22 +847,22 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
int tell2;
od_rollback_buffer dc_buf;
- dc_rate = -OD_LOG2((double)(skip_cdf[3] - skip_cdf[2])/
- (double)(skip_cdf[2] - skip_cdf[1]));
+ dc_rate = -OD_LOG2((double)(OD_ICDF(skip_cdf[3]) - OD_ICDF(skip_cdf[2]))/
+ (double)(OD_ICDF(skip_cdf[2]) - OD_ICDF(skip_cdf[1])));
dc_rate += 1;
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
tell2 = od_ec_enc_tell_frac(&enc->w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
od_encode_checkpoint(enc, &dc_buf);
generic_encode(&enc->w, &enc->state.adapt->model_dc[pli],
n - 1, &enc->state.adapt->ex_dc[pli][bs][0], 2);
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
tell2 = od_ec_enc_tell_frac(&enc->w.ec) - tell2;
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
dc_rate += tell2/8.0;
od_encode_rollback(enc, &dc_buf);
@@ -871,10 +871,10 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
enc->pvq_norm_lambda);
}
}
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
tell = od_ec_enc_tell_frac(&enc->w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
/* Code as if we're not skipping. */
aom_write_symbol(&enc->w, 2 + (out[0] != 0), skip_cdf, 4);
@@ -921,22 +921,22 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
}
if (encode_flip) cfl_encoded = 1;
}
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
tell = od_ec_enc_tell_frac(&enc->w.ec) - tell;
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
/* Account for the rate of skipping the AC, based on the same DC decision
we made when trying to not skip AC. */
{
double skip_rate;
if (out[0] != 0) {
- skip_rate = -OD_LOG2((skip_cdf[1] - skip_cdf[0])/
- (double)skip_cdf[3]);
+ skip_rate = -OD_LOG2((OD_ICDF(skip_cdf[1]) - OD_ICDF(skip_cdf[0]))/
+ (double)OD_ICDF(skip_cdf[3]));
}
else {
- skip_rate = -OD_LOG2(skip_cdf[0]/
- (double)skip_cdf[3]);
+ skip_rate = -OD_LOG2(OD_ICDF(skip_cdf[0])/
+ (double)OD_ICDF(skip_cdf[3]));
}
tell -= (int)floor(.5+8*skip_rate);
}
@@ -951,22 +951,22 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
int tell2;
od_rollback_buffer dc_buf;
- dc_rate = -OD_LOG2((double)(skip_cdf[1] - skip_cdf[0])/
- (double)skip_cdf[0]);
+ dc_rate = -OD_LOG2((double)(OD_ICDF(skip_cdf[1]) - OD_ICDF(skip_cdf[0]))/
+ (double)OD_ICDF(skip_cdf[0]));
dc_rate += 1;
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
tell2 = od_ec_enc_tell_frac(&enc->w.ec);
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
od_encode_checkpoint(enc, &dc_buf);
generic_encode(&enc->w, &enc->state.adapt->model_dc[pli],
n - 1, &enc->state.adapt->ex_dc[pli][bs][0], 2);
-#if CONFIG_DAALA_EC
+#if !CONFIG_ANS
tell2 = od_ec_enc_tell_frac(&enc->w.ec) - tell2;
#else
-#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
+#error "CONFIG_PVQ currently requires !CONFIG_ANS."
#endif
dc_rate += tell2/8.0;
od_encode_rollback(enc, &dc_buf);
diff --git a/third_party/aom/av1/encoder/ransac.c b/third_party/aom/av1/encoder/ransac.c
index 5d5dd7572..bbd2d179c 100644
--- a/third_party/aom/av1/encoder/ransac.c
+++ b/third_party/aom/av1/encoder/ransac.c
@@ -8,7 +8,6 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#define _POSIX_C_SOURCE 200112L // rand_r()
#include <memory.h>
#include <math.h>
#include <time.h>
@@ -17,6 +16,7 @@
#include <assert.h>
#include "av1/encoder/ransac.h"
+#include "av1/encoder/mathutils.h"
#define MAX_MINPTS 4
#define MAX_DEGENERATE_ITER 10
@@ -133,309 +133,6 @@ static void project_points_double_homography(double *mat, double *points,
}
}
-///////////////////////////////////////////////////////////////////////////////
-// svdcmp
-// Adopted from Numerical Recipes in C
-
-static const double TINY_NEAR_ZERO = 1.0E-12;
-
-static INLINE double sign(double a, double b) {
- return ((b) >= 0 ? fabs(a) : -fabs(a));
-}
-
-static INLINE double pythag(double a, double b) {
- double ct;
- const double absa = fabs(a);
- const double absb = fabs(b);
-
- if (absa > absb) {
- ct = absb / absa;
- return absa * sqrt(1.0 + ct * ct);
- } else {
- ct = absa / absb;
- return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
- }
-}
-
-static void multiply_mat(const double *m1, const double *m2, double *res,
- const int m1_rows, const int inner_dim,
- const int m2_cols) {
- double sum;
-
- int row, col, inner;
- for (row = 0; row < m1_rows; ++row) {
- for (col = 0; col < m2_cols; ++col) {
- sum = 0;
- for (inner = 0; inner < inner_dim; ++inner)
- sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
- *(res++) = sum;
- }
- }
-}
-
-static int svdcmp(double **u, int m, int n, double w[], double **v) {
- const int max_its = 30;
- int flag, i, its, j, jj, k, l, nm;
- double anorm, c, f, g, h, s, scale, x, y, z;
- double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
- g = scale = anorm = 0.0;
- for (i = 0; i < n; i++) {
- l = i + 1;
- rv1[i] = scale * g;
- g = s = scale = 0.0;
- if (i < m) {
- for (k = i; k < m; k++) scale += fabs(u[k][i]);
- if (scale != 0.) {
- for (k = i; k < m; k++) {
- u[k][i] /= scale;
- s += u[k][i] * u[k][i];
- }
- f = u[i][i];
- g = -sign(sqrt(s), f);
- h = f * g - s;
- u[i][i] = f - g;
- for (j = l; j < n; j++) {
- for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
- f = s / h;
- for (k = i; k < m; k++) u[k][j] += f * u[k][i];
- }
- for (k = i; k < m; k++) u[k][i] *= scale;
- }
- }
- w[i] = scale * g;
- g = s = scale = 0.0;
- if (i < m && i != n - 1) {
- for (k = l; k < n; k++) scale += fabs(u[i][k]);
- if (scale != 0.) {
- for (k = l; k < n; k++) {
- u[i][k] /= scale;
- s += u[i][k] * u[i][k];
- }
- f = u[i][l];
- g = -sign(sqrt(s), f);
- h = f * g - s;
- u[i][l] = f - g;
- for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
- for (j = l; j < m; j++) {
- for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
- for (k = l; k < n; k++) u[j][k] += s * rv1[k];
- }
- for (k = l; k < n; k++) u[i][k] *= scale;
- }
- }
- anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
- }
-
- for (i = n - 1; i >= 0; i--) {
- if (i < n - 1) {
- if (g != 0.) {
- for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
- for (j = l; j < n; j++) {
- for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
- for (k = l; k < n; k++) v[k][j] += s * v[k][i];
- }
- }
- for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
- }
- v[i][i] = 1.0;
- g = rv1[i];
- l = i;
- }
- for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
- l = i + 1;
- g = w[i];
- for (j = l; j < n; j++) u[i][j] = 0.0;
- if (g != 0.) {
- g = 1.0 / g;
- for (j = l; j < n; j++) {
- for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
- f = (s / u[i][i]) * g;
- for (k = i; k < m; k++) u[k][j] += f * u[k][i];
- }
- for (j = i; j < m; j++) u[j][i] *= g;
- } else {
- for (j = i; j < m; j++) u[j][i] = 0.0;
- }
- ++u[i][i];
- }
- for (k = n - 1; k >= 0; k--) {
- for (its = 0; its < max_its; its++) {
- flag = 1;
- for (l = k; l >= 0; l--) {
- nm = l - 1;
- if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
- flag = 0;
- break;
- }
- if ((double)(fabs(w[nm]) + anorm) == anorm) break;
- }
- if (flag) {
- c = 0.0;
- s = 1.0;
- for (i = l; i <= k; i++) {
- f = s * rv1[i];
- rv1[i] = c * rv1[i];
- if ((double)(fabs(f) + anorm) == anorm) break;
- g = w[i];
- h = pythag(f, g);
- w[i] = h;
- h = 1.0 / h;
- c = g * h;
- s = -f * h;
- for (j = 0; j < m; j++) {
- y = u[j][nm];
- z = u[j][i];
- u[j][nm] = y * c + z * s;
- u[j][i] = z * c - y * s;
- }
- }
- }
- z = w[k];
- if (l == k) {
- if (z < 0.0) {
- w[k] = -z;
- for (j = 0; j < n; j++) v[j][k] = -v[j][k];
- }
- break;
- }
- if (its == max_its - 1) {
- aom_free(rv1);
- return 1;
- }
- assert(k > 0);
- x = w[l];
- nm = k - 1;
- y = w[nm];
- g = rv1[nm];
- h = rv1[k];
- f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
- g = pythag(f, 1.0);
- f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
- c = s = 1.0;
- for (j = l; j <= nm; j++) {
- i = j + 1;
- g = rv1[i];
- y = w[i];
- h = s * g;
- g = c * g;
- z = pythag(f, h);
- rv1[j] = z;
- c = f / z;
- s = h / z;
- f = x * c + g * s;
- g = g * c - x * s;
- h = y * s;
- y *= c;
- for (jj = 0; jj < n; jj++) {
- x = v[jj][j];
- z = v[jj][i];
- v[jj][j] = x * c + z * s;
- v[jj][i] = z * c - x * s;
- }
- z = pythag(f, h);
- w[j] = z;
- if (z != 0.) {
- z = 1.0 / z;
- c = f * z;
- s = h * z;
- }
- f = c * g + s * y;
- x = c * y - s * g;
- for (jj = 0; jj < m; jj++) {
- y = u[jj][j];
- z = u[jj][i];
- u[jj][j] = y * c + z * s;
- u[jj][i] = z * c - y * s;
- }
- }
- rv1[l] = 0.0;
- rv1[k] = f;
- w[k] = x;
- }
- }
- aom_free(rv1);
- return 0;
-}
-
-static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
- // Assumes allocation for U is MxN
- double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
- double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
- int problem, i;
-
- problem = !(nrU && nrV);
- if (!problem) {
- for (i = 0; i < M; i++) {
- nrU[i] = &U[i * N];
- }
- for (i = 0; i < N; i++) {
- nrV[i] = &V[i * N];
- }
- } else {
- if (nrU) aom_free(nrU);
- if (nrV) aom_free(nrV);
- return 1;
- }
-
- /* copy from given matx into nrU */
- for (i = 0; i < M; i++) {
- memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
- }
-
- /* HERE IT IS: do SVD */
- if (svdcmp(nrU, M, N, W, nrV)) {
- aom_free(nrU);
- aom_free(nrV);
- return 1;
- }
-
- /* aom_free Numerical Recipes arrays */
- aom_free(nrU);
- aom_free(nrV);
-
- return 0;
-}
-
-int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
- double ans;
- int i, j, k;
- double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
- double *const W = (double *)aom_malloc(N * sizeof(*matx));
- double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
-
- if (!(U && W && V)) {
- return 1;
- }
- if (SVD(U, W, V, matx, M, N)) {
- aom_free(U);
- aom_free(W);
- aom_free(V);
- return 1;
- }
- for (i = 0; i < N; i++) {
- if (fabs(W[i]) < TINY_NEAR_ZERO) {
- aom_free(U);
- aom_free(W);
- aom_free(V);
- return 1;
- }
- }
-
- for (i = 0; i < N; i++) {
- for (j = 0; j < M; j++) {
- ans = 0;
- for (k = 0; k < N; k++) {
- ans += V[k + N * i] * U[k + N * j] / W[k];
- }
- inv[j + M * i] = ans;
- }
- }
- aom_free(U);
- aom_free(W);
- aom_free(V);
- return 0;
-}
-
static void normalize_homography(double *pts, int n, double *T) {
double *p = pts;
double mean[2] = { 0, 0 };
@@ -597,7 +294,7 @@ static int find_translation(int np, double *pts1, double *pts2, double *mat) {
static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) {
const int np2 = np * 2;
- double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9);
+ double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 5 + 20));
double *b = a + np2 * 4;
double *temp = b + np2;
int i;
@@ -625,11 +322,10 @@ static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) {
b[2 * i] = dx;
b[2 * i + 1] = dy;
}
- if (pseudo_inverse(temp, a, np2, 4)) {
+ if (!least_squares(4, a, np2, 4, b, temp, mat)) {
aom_free(a);
return 1;
}
- multiply_mat(temp, b, mat, 4, np2, 1);
denormalize_rotzoom_reorder(mat, T1, T2);
aom_free(a);
return 0;
@@ -637,7 +333,7 @@ static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) {
static int find_affine(int np, double *pts1, double *pts2, double *mat) {
const int np2 = np * 2;
- double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13);
+ double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 7 + 42));
double *b = a + np2 * 6;
double *temp = b + np2;
int i;
@@ -669,11 +365,10 @@ static int find_affine(int np, double *pts1, double *pts2, double *mat) {
b[2 * i] = dx;
b[2 * i + 1] = dy;
}
- if (pseudo_inverse(temp, a, np2, 6)) {
+ if (!least_squares(6, a, np2, 6, b, temp, mat)) {
aom_free(a);
return 1;
}
- multiply_mat(temp, b, mat, 6, np2, 1);
denormalize_affine_reorder(mat, T1, T2);
aom_free(a);
return 0;
@@ -890,16 +585,22 @@ static int find_homography(int np, double *pts1, double *pts2, double *mat) {
return 0;
}
+// Generate a random number in the range [0, 32768).
+static unsigned int lcg_rand16(unsigned int *state) {
+ *state = (unsigned int)(*state * 1103515245ULL + 12345);
+ return *state / 65536 % 32768;
+}
+
static int get_rand_indices(int npoints, int minpts, int *indices,
unsigned int *seed) {
int i, j;
- int ptr = rand_r(seed) % npoints;
+ int ptr = lcg_rand16(seed) % npoints;
if (minpts > npoints) return 0;
indices[0] = ptr;
ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
i = 1;
while (i < minpts) {
- int index = rand_r(seed) % npoints;
+ int index = lcg_rand16(seed) % npoints;
while (index) {
ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
for (j = 0; j < i; ++j) {
@@ -986,6 +687,9 @@ static int ransac(const int *matched_points, int npoints,
double *cnp1, *cnp2;
+ for (i = 0; i < num_desired_motions; ++i) {
+ num_inliers_by_motion[i] = 0;
+ }
if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
return 1;
}
@@ -1072,7 +776,7 @@ static int ransac(const int *matched_points, int npoints,
if (current_motion.num_inliers >= worst_kept_motion->num_inliers &&
current_motion.num_inliers > 1) {
int temp;
- double fracinliers, pNoOutliers, mean_distance;
+ double fracinliers, pNoOutliers, mean_distance, dtemp;
mean_distance = sum_distance / ((double)current_motion.num_inliers);
current_motion.variance =
sum_distance_squared / ((double)current_motion.num_inliers - 1.0) -
@@ -1092,7 +796,10 @@ static int ransac(const int *matched_points, int npoints,
pNoOutliers = 1 - pow(fracinliers, minpts);
pNoOutliers = fmax(EPS, pNoOutliers);
pNoOutliers = fmin(1 - EPS, pNoOutliers);
- temp = (int)(log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers));
+ dtemp = log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers);
+ temp = (dtemp > (double)INT32_MAX)
+ ? INT32_MAX
+ : dtemp < (double)INT32_MIN ? INT32_MIN : (int)dtemp;
if (temp > 0 && temp < N) {
N = AOMMAX(temp, MIN_TRIALS);
diff --git a/third_party/aom/av1/encoder/ratectrl.c b/third_party/aom/av1/encoder/ratectrl.c
index 1f2ea3606..4552c674e 100644
--- a/third_party/aom/av1/encoder/ratectrl.c
+++ b/third_party/aom/av1/encoder/ratectrl.c
@@ -93,6 +93,11 @@ static int gf_low = 400;
static int kf_high = 5000;
static int kf_low = 400;
+double av1_resize_rate_factor(const AV1_COMP *cpi) {
+ return (double)(cpi->resize_scale_den * cpi->resize_scale_den) /
+ (cpi->resize_scale_num * cpi->resize_scale_num);
+}
+
// Functions to compute the active minq lookup table entries based on a
// formulaic approach to facilitate easier adjustment of the Q tables.
// The formulae were derived from computing a 3rd order polynomial best
@@ -384,7 +389,7 @@ static double get_rate_correction_factor(const AV1_COMP *cpi) {
else
rcf = rc->rate_correction_factors[INTER_NORMAL];
}
- rcf *= rcf_mult[rc->frame_size_selector];
+ rcf *= av1_resize_rate_factor(cpi);
return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
}
@@ -392,7 +397,7 @@ static void set_rate_correction_factor(AV1_COMP *cpi, double factor) {
RATE_CONTROL *const rc = &cpi->rc;
// Normalize RCF to account for the size-dependent scaling factor.
- factor /= rcf_mult[cpi->rc.frame_size_selector];
+ factor /= av1_resize_rate_factor(cpi);
factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
@@ -1076,7 +1081,7 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
}
// Modify active_best_quality for downscaled normal frames.
- if (rc->frame_size_selector != UNSCALED && !frame_is_kf_gf_arf(cpi)) {
+ if (!av1_resize_unscaled(cpi) && !frame_is_kf_gf_arf(cpi)) {
int qdelta = av1_compute_qdelta_by_rate(
rc, cm->frame_type, active_best_quality, 2.0, cm->bit_depth);
active_best_quality =
@@ -1158,11 +1163,10 @@ void av1_rc_set_frame_target(AV1_COMP *cpi, int target) {
rc->this_frame_target = target;
- // Modify frame size target when down-scaling.
- if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC &&
- rc->frame_size_selector != UNSCALED)
- rc->this_frame_target = (int)(rc->this_frame_target *
- rate_thresh_mult[rc->frame_size_selector]);
+ // Modify frame size target when down-scaled.
+ if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && !av1_resize_unscaled(cpi))
+ rc->this_frame_target =
+ (int)(rc->this_frame_target * av1_resize_rate_factor(cpi));
// Target rate per SB64 (including partial SB64s.
rc->sb64_target_rate = (int)((int64_t)rc->this_frame_target * 64 * 64) /
@@ -1225,7 +1229,6 @@ static void update_golden_frame_stats(AV1_COMP *cpi) {
void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
const AV1_COMMON *const cm = &cpi->common;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
const int qindex = cm->base_qindex;
@@ -1317,13 +1320,6 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
rc->frames_since_key++;
rc->frames_to_key--;
}
-
- // Trigger the resizing of the next frame if it is scaled.
- if (oxcf->pass != 0) {
- cpi->resize_pending =
- rc->next_frame_size_selector != rc->frame_size_selector;
- rc->frame_size_selector = rc->next_frame_size_selector;
- }
}
void av1_rc_postencode_update_drop_frame(AV1_COMP *cpi) {
@@ -1501,10 +1497,7 @@ void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
target = calc_pframe_target_size_one_pass_cbr(cpi);
av1_rc_set_frame_target(cpi, target);
- if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
- cpi->resize_pending = av1_resize_one_pass_cbr(cpi);
- else
- cpi->resize_pending = 0;
+ // TODO(afergs): Decide whether to scale up, down, or not at all
}
int av1_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
@@ -1670,90 +1663,3 @@ void av1_set_target_rate(AV1_COMP *cpi) {
vbr_rate_correction(cpi, &target_rate);
av1_rc_set_frame_target(cpi, target_rate);
}
-
-// Check if we should resize, based on average QP from past x frames.
-// Only allow for resize at most one scale down for now, scaling factor is 2.
-int av1_resize_one_pass_cbr(AV1_COMP *cpi) {
- const AV1_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- int resize_now = 0;
- cpi->resize_scale_num = 1;
- cpi->resize_scale_den = 1;
- // Don't resize on key frame; reset the counters on key frame.
- if (cm->frame_type == KEY_FRAME) {
- cpi->resize_avg_qp = 0;
- cpi->resize_count = 0;
- return 0;
- }
- // Resize based on average buffer underflow and QP over some window.
- // Ignore samples close to key frame, since QP is usually high after key.
- if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
- const int window = (int)(5 * cpi->framerate);
- cpi->resize_avg_qp += cm->base_qindex;
- if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
- ++cpi->resize_buffer_underflow;
- ++cpi->resize_count;
- // Check for resize action every "window" frames.
- if (cpi->resize_count >= window) {
- int avg_qp = cpi->resize_avg_qp / cpi->resize_count;
- // Resize down if buffer level has underflowed sufficent amount in past
- // window, and we are at original resolution.
- // Resize back up if average QP is low, and we are currently in a resized
- // down state.
- if (cpi->resize_state == 0 &&
- cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) {
- resize_now = 1;
- cpi->resize_state = 1;
- } else if (cpi->resize_state == 1 &&
- avg_qp < 40 * cpi->rc.worst_quality / 100) {
- resize_now = -1;
- cpi->resize_state = 0;
- }
- // Reset for next window measurement.
- cpi->resize_avg_qp = 0;
- cpi->resize_count = 0;
- cpi->resize_buffer_underflow = 0;
- }
- }
- // If decision is to resize, reset some quantities, and check is we should
- // reduce rate correction factor,
- if (resize_now != 0) {
- int target_bits_per_frame;
- int active_worst_quality;
- int qindex;
- int tot_scale_change;
- // For now, resize is by 1/2 x 1/2.
- cpi->resize_scale_num = 1;
- cpi->resize_scale_den = 2;
- tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) /
- (cpi->resize_scale_num * cpi->resize_scale_num);
- // Reset buffer level to optimal, update target size.
- rc->buffer_level = rc->optimal_buffer_level;
- rc->bits_off_target = rc->optimal_buffer_level;
- rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi);
- // Reset cyclic refresh parameters.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
- av1_cyclic_refresh_reset_resize(cpi);
- // Get the projected qindex, based on the scaled target frame size (scaled
- // so target_bits_per_mb in av1_rc_regulate_q will be correct target).
- target_bits_per_frame = (resize_now == 1)
- ? rc->this_frame_target * tot_scale_change
- : rc->this_frame_target / tot_scale_change;
- active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
- qindex = av1_rc_regulate_q(cpi, target_bits_per_frame, rc->best_quality,
- active_worst_quality);
- // If resize is down, check if projected q index is close to worst_quality,
- // and if so, reduce the rate correction factor (since likely can afford
- // lower q for resized frame).
- if (resize_now == 1 && qindex > 90 * cpi->rc.worst_quality / 100) {
- rc->rate_correction_factors[INTER_NORMAL] *= 0.85;
- }
- // If resize is back up, check if projected q index is too much above the
- // current base_qindex, and if so, reduce the rate correction factor
- // (since prefer to keep q for resized frame at least close to previous q).
- if (resize_now == -1 && qindex > 130 * cm->base_qindex / 100) {
- rc->rate_correction_factors[INTER_NORMAL] *= 0.9;
- }
- }
- return resize_now;
-}
diff --git a/third_party/aom/av1/encoder/ratectrl.h b/third_party/aom/av1/encoder/ratectrl.h
index 93a9b4939..61bb0c224 100644
--- a/third_party/aom/av1/encoder/ratectrl.h
+++ b/third_party/aom/av1/encoder/ratectrl.h
@@ -49,27 +49,6 @@ typedef enum {
} RATE_FACTOR_LEVEL;
#endif // CONFIG_EXT_REFS
-// Internal frame scaling level.
-typedef enum {
- UNSCALED = 0, // Frame is unscaled.
- SCALE_STEP1 = 1, // First-level down-scaling.
- FRAME_SCALE_STEPS
-} FRAME_SCALE_LEVEL;
-
-// Frame dimensions multiplier wrt the native frame size, in 1/16ths,
-// specified for the scale-up case.
-// e.g. 24 => 16/24 = 2/3 of native size. The restriction to 1/16th is
-// intended to match the capabilities of the normative scaling filters,
-// giving precedence to the up-scaling accuracy.
-static const int frame_scale_factor[FRAME_SCALE_STEPS] = { 16, 24 };
-
-// Multiplier of the target rate to be used as threshold for triggering scaling.
-static const double rate_thresh_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 };
-
-// Scale dependent Rate Correction Factor multipliers. Compensates for the
-// greater number of bits per pixel generated in down-scaled frames.
-static const double rcf_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 };
-
typedef struct {
// Rate targetting variables
int base_frame_target; // A baseline frame target before adjustment
@@ -162,10 +141,6 @@ typedef struct {
int q_2_frame;
// Auto frame-scaling variables.
- FRAME_SCALE_LEVEL frame_size_selector;
- FRAME_SCALE_LEVEL next_frame_size_selector;
- int frame_width[FRAME_SCALE_STEPS];
- int frame_height[FRAME_SCALE_STEPS];
int rf_level_maxq[RATE_FACTOR_LEVELS];
} RATE_CONTROL;
@@ -214,6 +189,10 @@ int av1_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
void av1_rc_get_one_pass_vbr_params(struct AV1_COMP *cpi);
void av1_rc_get_one_pass_cbr_params(struct AV1_COMP *cpi);
+// How many times less pixels there are to encode given the current scaling.
+// Temporary replacement for rcf_mult and rate_thresh_mult.
+double av1_resize_rate_factor(const struct AV1_COMP *cpi);
+
// Post encode update of the rate control parameters based
// on bytes used
void av1_rc_postencode_update(struct AV1_COMP *cpi, uint64_t bytes_used);
diff --git a/third_party/aom/av1/encoder/rd.c b/third_party/aom/av1/encoder/rd.c
index f06e569e7..94c3bb96d 100644
--- a/third_party/aom/av1/encoder/rd.c
+++ b/third_party/aom/av1/encoder/rd.c
@@ -330,7 +330,6 @@ static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
}
}
-#if CONFIG_REF_MV
void av1_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int ref,
int ref_mv_idx) {
MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
@@ -340,19 +339,14 @@ void av1_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int ref,
(void)ref_frame;
x->mvcost = x->mv_cost_stack[nmv_ctx];
x->nmvjointcost = x->nmv_vec_cost[nmv_ctx];
- x->mvsadcost = x->mvcost;
- x->nmvjointsadcost = x->nmvjointcost;
}
-#endif
void av1_initialize_rd_consts(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->td.mb;
RD_OPT *const rd = &cpi->rd;
int i;
-#if CONFIG_REF_MV
int nmv_ctx;
-#endif
aom_clear_system_state();
@@ -363,7 +357,6 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
set_block_thresholds(cm, rd);
-#if CONFIG_REF_MV
for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
av1_build_nmv_cost_table(
x->nmv_vec_cost[nmv_ctx],
@@ -373,19 +366,11 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
}
x->mvcost = x->mv_cost_stack[0];
x->nmvjointcost = x->nmv_vec_cost[0];
- x->mvsadcost = x->mvcost;
- x->nmvjointsadcost = x->nmvjointcost;
-#else
- av1_build_nmv_cost_table(
- x->nmvjointcost, cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
- &cm->fc->nmvc, cm->allow_high_precision_mv);
-#endif
if (cpi->oxcf.pass != 1) {
av1_fill_token_costs(x->token_costs, cm->fc->coef_probs);
- if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
- cm->frame_type == KEY_FRAME) {
+ if (cm->frame_type == KEY_FRAME) {
#if CONFIG_EXT_PARTITION_TYPES
for (i = 0; i < PARTITION_PLOFFSET; ++i)
av1_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
@@ -425,7 +410,6 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
fill_mode_costs(cpi);
if (!frame_is_intra_only(cm)) {
-#if CONFIG_REF_MV
for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
cpi->newmv_mode_cost[i][0] = av1_cost_bit(cm->fc->newmv_prob[i], 0);
cpi->newmv_mode_cost[i][1] = av1_cost_bit(cm->fc->newmv_prob[i], 1);
@@ -445,20 +429,17 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
cpi->drl_mode_cost0[i][0] = av1_cost_bit(cm->fc->drl_prob[i], 0);
cpi->drl_mode_cost0[i][1] = av1_cost_bit(cm->fc->drl_prob[i], 1);
}
-#else
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- av1_cost_tokens((int *)cpi->inter_mode_cost[i],
- cm->fc->inter_mode_probs[i], av1_inter_mode_tree);
-#endif // CONFIG_REF_MV
#if CONFIG_EXT_INTER
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
av1_cost_tokens((int *)cpi->inter_compound_mode_cost[i],
cm->fc->inter_compound_mode_probs[i],
av1_inter_compound_mode_tree);
+#if CONFIG_INTERINTRA
for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
av1_cost_tokens((int *)cpi->interintra_mode_cost[i],
cm->fc->interintra_mode_prob[i],
av1_interintra_mode_tree);
+#endif // CONFIG_INTERINTRA
#endif // CONFIG_EXT_INTER
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) {
@@ -575,9 +556,15 @@ static void get_entropy_contexts_plane(
const ENTROPY_CONTEXT *const above = pd->above_context;
const ENTROPY_CONTEXT *const left = pd->left_context;
+#if CONFIG_LV_MAP
+ memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+ memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+ return;
+#endif // CONFIG_LV_MAP
+
int i;
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
switch (tx_size) {
case TX_2X2:
memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
@@ -609,6 +596,20 @@ static void get_entropy_contexts_plane(
t_left[i] =
!!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
break;
+#if CONFIG_TX64X64
+ case TX_64X64:
+ for (i = 0; i < num_4x4_w; i += 32)
+ t_above[i] =
+ !!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8] |
+ *(const uint64_t *)&above[i + 16] |
+ *(const uint64_t *)&above[i + 24]);
+ for (i = 0; i < num_4x4_h; i += 32)
+ t_left[i] =
+ !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8] |
+ *(const uint64_t *)&left[i + 16] |
+ *(const uint64_t *)&left[i + 24]);
+ break;
+#endif // CONFIG_TX64X64
case TX_4X8:
for (i = 0; i < num_4x4_w; i += 2)
t_above[i] = !!*(const uint16_t *)&above[i];
@@ -647,11 +648,39 @@ static void get_entropy_contexts_plane(
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ case TX_4X16:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 8)
+ t_left[i] = !!*(const uint64_t *)&left[i];
+ break;
+ case TX_16X4:
+ for (i = 0; i < num_4x4_w; i += 8)
+ t_above[i] = !!*(const uint64_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+ case TX_8X32:
+ for (i = 0; i < num_4x4_w; i += 4)
+ t_above[i] = !!*(const uint32_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 16)
+ t_left[i] =
+ !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
+ break;
+ case TX_32X8:
+ for (i = 0; i < num_4x4_w; i += 16)
+ t_above[i] =
+ !!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8]);
+ for (i = 0; i < num_4x4_h; i += 4)
+ t_left[i] = !!*(const uint32_t *)&left[i];
+ break;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
default: assert(0 && "Invalid transform size."); break;
}
return;
-#endif
+#endif // CONFIG_CHROMA_2X2
switch (tx_size) {
case TX_4X4:
@@ -720,6 +749,30 @@ static void get_entropy_contexts_plane(
for (i = 0; i < num_4x4_h; i += 4)
t_left[i] = !!*(const uint32_t *)&left[i];
break;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ case TX_4X16:
+ memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+ for (i = 0; i < num_4x4_h; i += 4)
+ t_left[i] = !!*(const uint32_t *)&left[i];
+ break;
+ case TX_16X4:
+ for (i = 0; i < num_4x4_w; i += 4)
+ t_above[i] = !!*(const uint32_t *)&above[i];
+ memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+ break;
+ case TX_8X32:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 8)
+ t_left[i] = !!*(const uint64_t *)&left[i];
+ break;
+ case TX_32X8:
+ for (i = 0; i < num_4x4_w; i += 8)
+ t_above[i] = !!*(const uint64_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
default: assert(0 && "Invalid transform size."); break;
}
}
@@ -728,7 +781,12 @@ void av1_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd,
ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE],
ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) {
+#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
+ const BLOCK_SIZE plane_bsize =
+ AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
+#else
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+#endif
get_entropy_contexts_plane(plane_bsize, tx_size, pd, t_above, t_left);
}
@@ -740,27 +798,25 @@ void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
int best_sad = INT_MAX;
int this_sad = INT_MAX;
int max_mv = 0;
- int near_same_nearest;
uint8_t *src_y_ptr = x->plane[0].src.buf;
uint8_t *ref_y_ptr;
- const int num_mv_refs =
- MAX_MV_REF_CANDIDATES +
- (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size);
-
- MV pred_mv[3];
- pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
- pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
- pred_mv[2] = x->pred_mv[ref_frame];
+ MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
+ int num_mv_refs = 0;
+
+ pred_mv[num_mv_refs++] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
+ if (x->mbmi_ext->ref_mvs[ref_frame][0].as_int !=
+ x->mbmi_ext->ref_mvs[ref_frame][1].as_int) {
+ pred_mv[num_mv_refs++] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
+ }
+ if (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size)
+ pred_mv[num_mv_refs++] = x->pred_mv[ref_frame];
+
assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
- near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
- x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
// Get the sad for each candidate reference mv.
for (i = 0; i < num_mv_refs; ++i) {
const MV *this_mv = &pred_mv[i];
int fp_row, fp_col;
-
- if (i == 1 && near_same_nearest) continue;
fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
@@ -959,8 +1015,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
#if CONFIG_EXT_INTER
- rd->thresh_mult[THR_COMP_NEAREST_NEARLA] += 1200;
- rd->thresh_mult[THR_COMP_NEAR_NEARESTLA] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500;
@@ -970,8 +1024,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_ZERO_ZEROLA] += 2500;
#if CONFIG_EXT_REFS
- rd->thresh_mult[THR_COMP_NEAREST_NEARL2A] += 1200;
- rd->thresh_mult[THR_COMP_NEAR_NEARESTL2A] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARL2A] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500;
@@ -980,8 +1032,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000;
rd->thresh_mult[THR_COMP_ZERO_ZEROL2A] += 2500;
- rd->thresh_mult[THR_COMP_NEAREST_NEARL3A] += 1200;
- rd->thresh_mult[THR_COMP_NEAR_NEARESTL3A] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARL3A] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] += 1500;
@@ -991,8 +1041,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_ZERO_ZEROL3A] += 2500;
#endif // CONFIG_EXT_REFS
- rd->thresh_mult[THR_COMP_NEAREST_NEARGA] += 1200;
- rd->thresh_mult[THR_COMP_NEAR_NEARESTGA] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500;
@@ -1002,8 +1050,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_ZERO_ZEROGA] += 2500;
#if CONFIG_EXT_REFS
- rd->thresh_mult[THR_COMP_NEAREST_NEARLB] += 1200;
- rd->thresh_mult[THR_COMP_NEAR_NEARESTLB] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARLB] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500;
@@ -1012,8 +1058,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000;
rd->thresh_mult[THR_COMP_ZERO_ZEROLB] += 2500;
- rd->thresh_mult[THR_COMP_NEAREST_NEARL2B] += 1200;
- rd->thresh_mult[THR_COMP_NEAR_NEARESTL2B] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARL2B] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] += 1500;
@@ -1022,8 +1066,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEW_NEWL2B] += 2000;
rd->thresh_mult[THR_COMP_ZERO_ZEROL2B] += 2500;
- rd->thresh_mult[THR_COMP_NEAREST_NEARL3B] += 1200;
- rd->thresh_mult[THR_COMP_NEAR_NEARESTL3B] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARL3B] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] += 1500;
@@ -1032,8 +1074,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
rd->thresh_mult[THR_COMP_NEW_NEWL3B] += 2000;
rd->thresh_mult[THR_COMP_ZERO_ZEROL3B] += 2500;
- rd->thresh_mult[THR_COMP_NEAREST_NEARGB] += 1200;
- rd->thresh_mult[THR_COMP_NEAR_NEARESTGB] += 1200;
rd->thresh_mult[THR_COMP_NEAR_NEARGB] += 1200;
rd->thresh_mult[THR_COMP_NEAREST_NEWGB] += 1500;
rd->thresh_mult[THR_COMP_NEW_NEARESTGB] += 1500;
diff --git a/third_party/aom/av1/encoder/rd.h b/third_party/aom/av1/encoder/rd.h
index c0ac1f7e7..5c3eee493 100644
--- a/third_party/aom/av1/encoder/rd.h
+++ b/third_party/aom/av1/encoder/rd.h
@@ -130,6 +130,10 @@ typedef enum {
#if CONFIG_ALT_INTRA
THR_SMOOTH,
+#if CONFIG_SMOOTH_HV
+ THR_SMOOTH_V,
+ THR_SMOOTH_H,
+#endif // CONFIG_SMOOTH_HV
#endif // CONFIG_ALT_INTRA
#if CONFIG_EXT_INTER
@@ -357,6 +361,9 @@ static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
rd_stats->rdcost = 0;
rd_stats->sse = 0;
rd_stats->skip = 1;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ rd_stats->dist_y = 0;
+#endif
#if CONFIG_RD_DEBUG
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats->txb_coeff_cost[plane] = 0;
@@ -381,6 +388,9 @@ static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
rd_stats->rdcost = INT64_MAX;
rd_stats->sse = INT64_MAX;
rd_stats->skip = 0;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ rd_stats->dist_y = INT64_MAX;
+#endif
#if CONFIG_RD_DEBUG
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats->txb_coeff_cost[plane] = INT_MAX;
@@ -405,6 +415,9 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
rd_stats_dst->dist += rd_stats_src->dist;
rd_stats_dst->sse += rd_stats_src->sse;
rd_stats_dst->skip &= rd_stats_src->skip;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ rd_stats_dst->dist_y += rd_stats_src->dist_y;
+#endif
#if CONFIG_RD_DEBUG
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
@@ -454,10 +467,8 @@ YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi,
void av1_init_me_luts(void);
-#if CONFIG_REF_MV
void av1_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int ref,
int ref_mv_idx);
-#endif
void av1_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd,
diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c
index a1096f782..2a537a06a 100644
--- a/third_party/aom/av1/encoder/rdopt.c
+++ b/third_party/aom/av1/encoder/rdopt.c
@@ -66,11 +66,18 @@
#endif // CONFIG_PVQ || CONFIG_DAALA_DIST
#if CONFIG_DUAL_FILTER
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
+#if USE_EXTRA_FILTER
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
{ 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
{ 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
{ 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
};
+#else // USE_EXTRA_FILTER
+static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
+ { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
+ { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
+};
+#endif // USE_EXTRA_FILTER
#endif // CONFIG_DUAL_FILTER
#if CONFIG_EXT_REFS
@@ -217,11 +224,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
#if CONFIG_ALT_INTRA
{ SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
+#if CONFIG_SMOOTH_HV
+ { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
+ { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
+#endif // CONFIG_SMOOTH_HV
#endif // CONFIG_ALT_INTRA
#if CONFIG_EXT_INTER
- { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
- { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
@@ -231,8 +240,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
- { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
@@ -241,8 +248,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
- { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
- { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
@@ -252,8 +257,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif // CONFIG_EXT_REFS
- { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
- { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
@@ -263,8 +266,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
- { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
@@ -273,8 +274,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
- { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
@@ -283,8 +282,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
- { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
@@ -293,8 +290,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
- { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
- { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
@@ -390,28 +385,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
#endif // CONFIG_EXT_INTER
};
-static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
- { { LAST_FRAME, NONE_FRAME } },
-#if CONFIG_EXT_REFS
- { { LAST2_FRAME, NONE_FRAME } }, { { LAST3_FRAME, NONE_FRAME } },
- { { BWDREF_FRAME, NONE_FRAME } },
-#endif // CONFIG_EXT_REFS
- { { GOLDEN_FRAME, NONE_FRAME } }, { { ALTREF_FRAME, NONE_FRAME } },
-
- { { LAST_FRAME, ALTREF_FRAME } },
-#if CONFIG_EXT_REFS
- { { LAST2_FRAME, ALTREF_FRAME } }, { { LAST3_FRAME, ALTREF_FRAME } },
-#endif // CONFIG_EXT_REFS
- { { GOLDEN_FRAME, ALTREF_FRAME } },
-
-#if CONFIG_EXT_REFS
- { { LAST_FRAME, BWDREF_FRAME } }, { { LAST2_FRAME, BWDREF_FRAME } },
- { { LAST3_FRAME, BWDREF_FRAME } }, { { GOLDEN_FRAME, BWDREF_FRAME } },
-#endif // CONFIG_EXT_REFS
-
- { { INTRA_FRAME, NONE_FRAME } },
-};
-
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
static INLINE int write_uniform_cost(int n, int v) {
const int l = get_unsigned_bits(n);
@@ -430,22 +403,6 @@ static INLINE int write_uniform_cost(int n, int v) {
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3
-static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
- DCT_1D, ADST_1D, DCT_1D, ADST_1D,
-#if CONFIG_EXT_TX
- FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
- DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
-#endif // CONFIG_EXT_TX
-};
-
-static const TX_TYPE_1D htx_tab[TX_TYPES] = {
- DCT_1D, DCT_1D, ADST_1D, ADST_1D,
-#if CONFIG_EXT_TX
- DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
- IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
-#endif // CONFIG_EXT_TX
-};
-
#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(od_coeff *x, int stride) {
int sum;
@@ -603,10 +560,9 @@ static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
return sum;
}
-static int64_t av1_daala_dist(const uint8_t *src, int src_stride,
- const uint8_t *dst, int dst_stride, int bsw,
- int bsh, int qm, int use_activity_masking,
- int qindex) {
+int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
+ int dst_stride, int bsw, int bsh, int qm,
+ int use_activity_masking, int qindex) {
int i, j;
int64_t d;
DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
@@ -843,7 +799,7 @@ static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
const MACROBLOCKD *const xd, int tx_set) {
#if CONFIG_EXT_TX
- const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
+ const int *tx_set_1D = tx_set >= 0 ? ext_tx_used_inter_1D[tx_set] : NULL;
#else
const int tx_set_1D[TX_TYPES_1D] = { 0 };
#endif // CONFIG_EXT_TX
@@ -1100,13 +1056,10 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
int c, cost;
const int16_t *scan = scan_order->scan;
const int16_t *nb = scan_order->neighbors;
-#if CONFIG_NEW_TOKENSET
const int ref = is_inter_block(mbmi);
aom_prob *blockz_probs =
cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
-#endif // CONFIG_NEW_TOKENSET
-
#if CONFIG_HIGHBITDEPTH
const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
#else
@@ -1120,12 +1073,8 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
(void)cm;
if (eob == 0) {
-#if CONFIG_NEW_TOKENSET
// single eob token
cost = av1_cost_bit(blockz_probs[pt], 0);
-#else
- cost = token_costs[0][0][pt][EOB_TOKEN];
-#endif // CONFIG_NEW_TOKENSET
} else {
if (use_fast_coef_costing) {
int band_left = *band_count++;
@@ -1134,11 +1083,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
int v = qcoeff[0];
int16_t prev_t;
cost = av1_get_token_cost(v, &prev_t, cat6_bits);
-#if CONFIG_NEW_TOKENSET
cost += (*token_costs)[!prev_t][pt][prev_t];
-#else
- cost += (*token_costs)[0][pt][prev_t];
-#endif
token_cache[0] = av1_pt_energy_class[prev_t];
++token_costs;
@@ -1150,11 +1095,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
v = qcoeff[rc];
cost += av1_get_token_cost(v, &t, cat6_bits);
-#if CONFIG_NEW_TOKENSET
cost += (*token_costs)[!t][!prev_t][t];
-#else
- cost += (*token_costs)[!prev_t][!prev_t][t];
-#endif
prev_t = t;
if (!--band_left) {
band_left = *band_count++;
@@ -1163,8 +1104,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
}
// eob token
- if (band_left || CONFIG_NEW_TOKENSET)
- cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
+ cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
} else { // !use_fast_coef_costing
int band_left = *band_count++;
@@ -1172,23 +1112,12 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
// dc token
int v = qcoeff[0];
int16_t tok;
-#if !CONFIG_NEW_TOKENSET
- unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
-#endif
cost = av1_get_token_cost(v, &tok, cat6_bits);
-#if CONFIG_NEW_TOKENSET
cost += (*token_costs)[!tok][pt][tok];
-#else
- cost += (*token_costs)[0][pt][tok];
-#endif
token_cache[0] = av1_pt_energy_class[tok];
++token_costs;
-#if !CONFIG_NEW_TOKENSET
- tok_cost_ptr = &((*token_costs)[!tok]);
-#endif
-
// ac tokens
for (c = 1; c < eob; c++) {
const int rc = scan[c];
@@ -1196,26 +1125,17 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
v = qcoeff[rc];
cost += av1_get_token_cost(v, &tok, cat6_bits);
pt = get_coef_context(nb, token_cache, c);
-#if CONFIG_NEW_TOKENSET
cost += (*token_costs)[!tok][pt][tok];
-#else
- cost += (*tok_cost_ptr)[pt][tok];
-#endif
token_cache[rc] = av1_pt_energy_class[tok];
if (!--band_left) {
band_left = *band_count++;
++token_costs;
}
-#if !CONFIG_NEW_TOKENSET
- tok_cost_ptr = &((*token_costs)[!tok]);
-#endif
}
// eob token
- if (band_left || CONFIG_NEW_TOKENSET) {
- pt = get_coef_context(nb, token_cache, c);
- cost += (*token_costs)[0][pt][EOB_TOKEN];
- }
+ pt = get_coef_context(nb, token_cache, c);
+ cost += (*token_costs)[0][pt][EOB_TOKEN];
}
}
@@ -1262,7 +1182,9 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
BLOCK_SIZE tx_bsize, int *width, int *height,
int *visible_width, int *visible_height) {
+#if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
assert(tx_bsize <= plane_bsize);
+#endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
int txb_height = block_size_high[tx_bsize];
int txb_width = block_size_wide[tx_bsize];
const int block_height = block_size_high[plane_bsize];
@@ -1298,7 +1220,12 @@ static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
&txb_cols, &txb_rows, &visible_cols, &visible_rows);
assert(visible_rows > 0);
assert(visible_cols > 0);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ if ((txb_rows == visible_rows && txb_cols == visible_cols) &&
+ tx_bsize < BLOCK_SIZES) {
+#else
if (txb_rows == visible_rows && txb_cols == visible_cols) {
+#endif
unsigned sse;
cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
return sse;
@@ -1533,7 +1460,36 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
if (args->exit_early) return;
if (!is_inter_block(mbmi)) {
+#if CONFIG_CFL
+
+#if CONFIG_EC_ADAPT
+ FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
+#else
+ FRAME_CONTEXT *const ec_ctx = cm->fc;
+#endif // CONFIG_EC_ADAPT
+
+ av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
+ blk_row, tx_size, plane_bsize);
+#else
av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
+#endif
+#if CONFIG_DPCM_INTRA
+ const int block_raster_idx =
+ av1_block_index_to_raster_order(tx_size, block);
+ const PREDICTION_MODE mode =
+ (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
+ TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
+ xd, block, tx_size);
+ if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
+ int8_t skip;
+ av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, tx_type, a, l, &skip);
+ av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
+ tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
+ OUTPUT_HAS_DECODED_PIXELS);
+ goto CALCULATE_RD;
+ }
+#endif // CONFIG_DPCM_INTRA
av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
}
@@ -1542,8 +1498,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
const int coeff_ctx = combine_entropy_contexts(*a, *l);
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
- if (x->plane[plane].eobs[block] && !xd->lossless[mbmi->segment_id])
- av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx);
+ av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
if (!is_inter_block(mbmi)) {
struct macroblock_plane *const p = &x->plane[plane];
@@ -1566,6 +1521,9 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
}
#endif
+#if CONFIG_DPCM_INTRA
+CALCULATE_RD : {}
+#endif // CONFIG_DPCM_INTRA
rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
if (args->this_rd + rd > args->best_rd) {
args->exit_early = 1;
@@ -1603,7 +1561,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
rd = AOMMIN(rd1, rd2);
#if CONFIG_DAALA_DIST
- if (plane == 0 &&
+ if (plane == 0 && plane_bsize >= BLOCK_8X8 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) {
this_rd_stats.dist = 0;
this_rd_stats.sse = 0;
@@ -1641,6 +1599,9 @@ static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
int use_activity_masking = 0;
(void)tx_size;
+
+ assert(plane == 0);
+ assert(plane_bsize >= BLOCK_8X8);
#if CONFIG_PVQ
use_activity_masking = x->daala_enc.use_activity_masking;
#endif // CONFIG_PVQ
@@ -1700,10 +1661,15 @@ static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
{
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
+ const uint8_t txw_unit = tx_size_wide_unit[tx_size];
+ const uint8_t txh_unit = tx_size_high_unit[tx_size];
+ const int step = txw_unit * txh_unit;
+ int offset_h = tx_size_high_unit[TX_4X4];
// The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
- this_rd_stats.rate = x->rate_4x4[block - max_blocks_wide - 1] +
- x->rate_4x4[block - max_blocks_wide] +
- x->rate_4x4[block - 1] + x->rate_4x4[block];
+ this_rd_stats.rate =
+ x->rate_4x4[block - max_blocks_wide * offset_h - step] +
+ x->rate_4x4[block - max_blocks_wide * offset_h] +
+ x->rate_4x4[block - step] + x->rate_4x4[block];
}
rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
@@ -1740,10 +1706,10 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
#if CONFIG_DAALA_DIST
- if (plane == 0 &&
+ if (plane == 0 && bsize >= BLOCK_8X8 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
- av1_foreach_8x8_transformed_block_in_plane(
- xd, bsize, plane, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
+ av1_foreach_8x8_transformed_block_in_yplane(
+ xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
else
#endif // CONFIG_DAALA_DIST
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
@@ -1812,7 +1778,12 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
const int depth = tx_size_to_depth(coded_tx_size);
const int tx_size_ctx = get_tx_size_context(xd);
- const int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
+ int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+ if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
+ r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
+ tx_size == quarter_txsize_lookup[bsize]);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
return r_tx_size;
} else {
return 0;
@@ -1924,9 +1895,7 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
// transforms should be considered for pruning
prune = prune_tx_types(cpi, bs, x, xd, -1);
-#if CONFIG_REF_MV
if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
-#endif // CONFIG_REF_MV
if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
return 1;
if (!is_inter && x->use_default_intra_tx_type &&
@@ -1960,7 +1929,7 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
return 0;
}
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
MACROBLOCK *x, int *r, int64_t *d, int *s,
int64_t *sse, int64_t ref_best_rd) {
@@ -1973,7 +1942,7 @@ static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
*sse = rd_stats.sse;
return rd;
}
-#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats, int64_t ref_best_rd,
@@ -2191,9 +2160,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
#endif
TX_TYPE tx_type;
for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
-#if CONFIG_REF_MV
if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
-#endif // CONFIG_REF_MV
const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
RD_STATS this_rd_stats;
int ext_tx_set =
@@ -2219,6 +2186,56 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
#endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
}
}
+
+#if CONFIG_RECT_TX_EXT
+ // test 1:4/4:1 tx
+ int evaluate_quarter_tx = 0;
+ if (is_quarter_tx_allowed(xd, mbmi, is_inter)) {
+ if (tx_select) {
+ evaluate_quarter_tx = 1;
+ } else {
+ const TX_SIZE chosen_tx_size =
+ tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
+ evaluate_quarter_tx = chosen_tx_size == quarter_txsize_lookup[bs];
+ }
+ }
+ if (evaluate_quarter_tx) {
+ TX_TYPE tx_start = DCT_DCT;
+ TX_TYPE tx_end = TX_TYPES;
+#if CONFIG_TXK_SEL
+ // The tx_type becomes dummy when lv_map is on. The tx_type search will be
+ // performed in av1_search_txk_type()
+ tx_end = DCT_DCT + 1;
+#endif
+ TX_TYPE tx_type;
+ for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
+ if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
+ const TX_SIZE tx_size = quarter_txsize_lookup[bs];
+ RD_STATS this_rd_stats;
+ int ext_tx_set =
+ get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
+ if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
+ (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
+ rd =
+ txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, tx_size);
+ if (rd < best_rd) {
+#if CONFIG_TXK_SEL
+ memcpy(best_txk_type, mbmi->txk_type,
+ sizeof(best_txk_type[0]) * num_blk);
+#endif
+ best_tx_type = tx_type;
+ best_tx_size = tx_size;
+ best_rd = rd;
+ *rd_stats = this_rd_stats;
+ }
+ }
+#if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
+ const int is_inter = is_inter_block(mbmi);
+ if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
+#endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
+ }
+ }
+#endif // CONFIG_RECT_TX_EXT
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
if (tx_select) {
@@ -2334,6 +2351,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
BLOCK_SIZE bsize, int mode_cost) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ assert(!is_inter_block(mbmi));
RD_STATS this_rd_stats;
int row, col;
int64_t temp_sse, this_rd;
@@ -2348,7 +2366,21 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
int block = 0;
for (row = 0; row < max_blocks_high; row += stepr) {
for (col = 0; col < max_blocks_wide; col += stepc) {
+#if CONFIG_CFL
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+
+#if CONFIG_EC_ADAPT
+ FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
+#else
+ FRAME_CONTEXT *const ec_ctx = cpi->common.fc;
+#endif // CONFIG_EC_ADAPT
+
+ av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row,
+ tx_size, plane_bsize);
+#else
av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
+#endif
block += step;
}
}
@@ -2403,6 +2435,28 @@ static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
}
}
+#if CONFIG_PALETTE_DELTA_ENCODING
+// Bias toward using colors in the cache.
+// TODO(huisu): Try other schemes to improve compression.
+static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
+ int n_colors, int stride,
+ float *centroids) {
+ if (n_cache <= 0) return;
+ for (int i = 0; i < n_colors * stride; i += stride) {
+ float min_diff = fabsf(centroids[i] - color_cache[0]);
+ int idx = 0;
+ for (int j = 1; j < n_cache; ++j) {
+ float this_diff = fabsf(centroids[i] - color_cache[j]);
+ if (this_diff < min_diff) {
+ min_diff = this_diff;
+ idx = j;
+ }
+ }
+ if (min_diff < 1.5) centroids[i] = color_cache[idx];
+ }
+}
+#endif // CONFIG_PALETTE_DELTA_ENCODING
+
static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int palette_ctx,
int dc_mode_cost, MB_MODE_INFO *best_mbmi,
@@ -2414,6 +2468,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
MB_MODE_INFO *const mbmi = &mic->mbmi;
+ assert(!is_inter_block(mbmi));
int this_rate, colors, n;
const int src_stride = x->plane[0].src.stride;
const uint8_t *const src = x->plane[0].src.buf;
@@ -2488,12 +2543,38 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return 0;
+#if CONFIG_PALETTE_DELTA_ENCODING
+ const MODE_INFO *above_mi = xd->above_mi;
+ const MODE_INFO *left_mi = xd->left_mi;
+ uint16_t color_cache[2 * PALETTE_MAX_SIZE];
+ const int n_cache =
+ av1_get_palette_cache(above_mi, left_mi, 0, color_cache);
+#endif // CONFIG_PALETTE_DELTA_ENCODING
+
for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
--n) {
- for (i = 0; i < n; ++i)
- centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
- av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
- k = av1_remove_duplicates(centroids, n);
+ if (colors == PALETTE_MIN_SIZE) {
+ // Special case: These colors automatically become the centroids.
+ assert(colors == n);
+ assert(colors == 2);
+ centroids[0] = lb;
+ centroids[1] = ub;
+ k = 2;
+ } else {
+ for (i = 0; i < n; ++i) {
+ centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
+ }
+ av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
+#if CONFIG_PALETTE_DELTA_ENCODING
+ optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
+#endif // CONFIG_PALETTE_DELTA_ENCODING
+ k = av1_remove_duplicates(centroids, n);
+ if (k < PALETTE_MIN_SIZE) {
+ // Too few unique colors to create a palette. And DC_PRED will work
+ // well for that case anyway. So skip.
+ continue;
+ }
+ }
#if CONFIG_HIGHBITDEPTH
if (cpi->common.use_highbitdepth)
@@ -2516,7 +2597,11 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
av1_cost_bit(
av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
1);
- palette_mode_cost += av1_palette_color_cost_y(pmi, cpi->common.bit_depth);
+ palette_mode_cost += av1_palette_color_cost_y(pmi,
+#if CONFIG_PALETTE_DELTA_ENCODING
+ color_cache, n_cache,
+#endif // CONFIG_PALETTE_DELTA_ENCODING
+ cpi->common.bit_depth);
for (i = 0; i < rows; ++i) {
for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
int color_idx;
@@ -2570,6 +2655,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
const AV1_COMMON *const cm = &cpi->common;
PREDICTION_MODE mode;
MACROBLOCKD *const xd = &x->e_mbd;
+ assert(!is_inter_block(&xd->mi[0]->mbmi));
int64_t best_rd = rd_thresh;
struct macroblock_plane *p = &x->plane[0];
struct macroblockd_plane *pd = &xd->plane[0];
@@ -2577,7 +2663,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
const int dst_stride = pd->dst.stride;
const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
uint8_t *dst_init = &pd->dst.buf[row * 4 * dst_stride + col * 4];
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
// TODO(jingning): This is a temporal change. The whole function should be
// out when cb4x4 is enabled.
ENTROPY_CONTEXT ta[4], tempa[4];
@@ -2585,7 +2671,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
#else
ENTROPY_CONTEXT ta[2], tempa[2];
ENTROPY_CONTEXT tl[2], templ[2];
-#endif // CONFIG_CB4X4
+#endif // CONFIG_CHROMA_2X2
const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
@@ -2738,7 +2824,8 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
#if !CONFIG_PVQ
av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
+ av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
+ templ + idy);
ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
tempa + idx, templ + idy,
cpi->sf.use_fast_coef_costing);
@@ -2897,9 +2984,8 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
#endif // CONFIG_CB4X4
BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
- if (!is_lossless) {
- av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
- }
+ av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
+ templ + idy);
ratey +=
av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx,
@@ -3013,6 +3099,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
const MODE_INFO *above_mi = xd->above_mi;
const MODE_INFO *left_mi = xd->left_mi;
MB_MODE_INFO *const mbmi = &mic->mbmi;
+ assert(!is_inter_block(mbmi));
const BLOCK_SIZE bsize = mbmi->sb_type;
const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
@@ -3220,6 +3307,7 @@ static int64_t calc_rd_given_intra_angle(
RD_STATS tokenonly_rd_stats;
int64_t this_rd, this_model_rd;
MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
+ assert(!is_inter_block(mbmi));
mbmi->angle_delta[0] = angle_delta;
this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
@@ -3261,6 +3349,7 @@ static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
MB_MODE_INFO *mbmi = &mic->mbmi;
+ assert(!is_inter_block(mbmi));
int i, angle_delta, best_angle_delta = 0;
int first_try = 1;
#if CONFIG_INTRA_INTERP
@@ -3393,32 +3482,40 @@ static const uint8_t gradient_to_angle_bin[2][7][16] = {
},
};
+/* clang-format off */
static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
+#if CONFIG_ALT_INTRA
+ 0,
+#endif // CONFIG_ALT_INTRA
};
+/* clang-format on */
static void angle_estimation(const uint8_t *src, int src_stride, int rows,
- int cols, uint8_t *directional_mode_skip_mask) {
- int i, r, c, index, dx, dy, temp, sn, remd, quot;
+ int cols, BLOCK_SIZE bsize,
+ uint8_t *directional_mode_skip_mask) {
+ memset(directional_mode_skip_mask, 0,
+ INTRA_MODES * sizeof(*directional_mode_skip_mask));
+ // Sub-8x8 blocks do not use extra directions.
+ if (bsize < BLOCK_8X8) return;
uint64_t hist[DIRECTIONAL_MODES];
- uint64_t hist_sum = 0;
-
memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
src += src_stride;
+ int r, c, dx, dy;
for (r = 1; r < rows; ++r) {
for (c = 1; c < cols; ++c) {
dx = src[c] - src[c - 1];
dy = src[c] - src[c - src_stride];
- temp = dx * dx + dy * dy;
+ int index;
+ const int temp = dx * dx + dy * dy;
if (dy == 0) {
index = 2;
} else {
- sn = (dx > 0) ^ (dy > 0);
+ const int sn = (dx > 0) ^ (dy > 0);
dx = abs(dx);
dy = abs(dy);
- remd = dx % dy;
- quot = dx / dy;
- remd = remd * 16 / dy;
+ const int remd = (dx % dy) * 16 / dy;
+ const int quot = dx / dy;
index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
}
hist[index] += temp;
@@ -3426,9 +3523,11 @@ static void angle_estimation(const uint8_t *src, int src_stride, int rows,
src += src_stride;
}
+ int i;
+ uint64_t hist_sum = 0;
for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
for (i = 0; i < INTRA_MODES; ++i) {
- if (i != DC_PRED && i != TM_PRED) {
+ if (av1_is_directional_mode(i, bsize)) {
const uint8_t angle_bin = mode_to_angle_bin[i];
uint64_t score = 2 * hist[angle_bin];
int weight = 2;
@@ -3448,29 +3547,31 @@ static void angle_estimation(const uint8_t *src, int src_stride, int rows,
#if CONFIG_HIGHBITDEPTH
static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
- int rows, int cols,
+ int rows, int cols, BLOCK_SIZE bsize,
uint8_t *directional_mode_skip_mask) {
- int i, r, c, index, dx, dy, temp, sn, remd, quot;
- uint64_t hist[DIRECTIONAL_MODES];
- uint64_t hist_sum = 0;
+ memset(directional_mode_skip_mask, 0,
+ INTRA_MODES * sizeof(*directional_mode_skip_mask));
+ // Sub-8x8 blocks do not use extra directions.
+ if (bsize < BLOCK_8X8) return;
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-
+ uint64_t hist[DIRECTIONAL_MODES];
memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
src += src_stride;
+ int r, c, dx, dy;
for (r = 1; r < rows; ++r) {
for (c = 1; c < cols; ++c) {
dx = src[c] - src[c - 1];
dy = src[c] - src[c - src_stride];
- temp = dx * dx + dy * dy;
+ int index;
+ const int temp = dx * dx + dy * dy;
if (dy == 0) {
index = 2;
} else {
- sn = (dx > 0) ^ (dy > 0);
+ const int sn = (dx > 0) ^ (dy > 0);
dx = abs(dx);
dy = abs(dy);
- remd = dx % dy;
- quot = dx / dy;
- remd = remd * 16 / dy;
+ const int remd = (dx % dy) * 16 / dy;
+ const int quot = dx / dy;
index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
}
hist[index] += temp;
@@ -3478,9 +3579,11 @@ static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
src += src_stride;
}
+ int i;
+ uint64_t hist_sum = 0;
for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
for (i = 0; i < INTRA_MODES; ++i) {
- if (i != DC_PRED && i != TM_PRED) {
+ if (av1_is_directional_mode(i, bsize)) {
const uint8_t angle_bin = mode_to_angle_bin[i];
uint64_t score = 2 * hist[angle_bin];
int weight = 2;
@@ -3509,6 +3612,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
MB_MODE_INFO *const mbmi = &mic->mbmi;
+ assert(!is_inter_block(mbmi));
MB_MODE_INFO best_mbmi = *mbmi;
int64_t best_model_rd = INT64_MAX;
#if CONFIG_EXT_INTRA
@@ -3552,15 +3656,14 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_EXT_INTRA
mbmi->angle_delta[0] = 0;
- memset(directional_mode_skip_mask, 0,
- sizeof(directional_mode_skip_mask[0]) * INTRA_MODES);
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- highbd_angle_estimation(src, src_stride, rows, cols,
+ highbd_angle_estimation(src, src_stride, rows, cols, bsize,
directional_mode_skip_mask);
else
#endif // CONFIG_HIGHBITDEPTH
- angle_estimation(src, src_stride, rows, cols, directional_mode_skip_mask);
+ angle_estimation(src, src_stride, rows, cols, bsize,
+ directional_mode_skip_mask);
#endif // CONFIG_EXT_INTRA
#if CONFIG_FILTER_INTRA
mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
@@ -3833,7 +3936,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
- av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx);
+ av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
// TODO(any): Use av1_dist_block to compute distortion
#if CONFIG_HIGHBITDEPTH
@@ -3936,9 +4039,8 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
ENTROPY_CONTEXT *pta = ta + blk_col;
ENTROPY_CONTEXT *ptl = tl + blk_row;
int coeff_ctx, i;
- int ctx =
- txfm_partition_context(tx_above + (blk_col >> 1),
- tx_left + (blk_row >> 1), mbmi->sb_type, tx_size);
+ int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
+ mbmi->sb_type, tx_size);
int64_t sum_rd = INT64_MAX;
int tmp_eob = 0;
int zero_blk_rate;
@@ -4042,8 +4144,8 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
int idx, idy;
for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0);
for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0);
- txfm_partition_update(tx_above + (blk_col >> 1), tx_left + (blk_row >> 1),
- tx_size, tx_size);
+ txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
+ tx_size);
inter_tx_size[0][0] = tx_size;
for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
@@ -4082,17 +4184,15 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
- TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
- TXFM_CONTEXT tx_left[MAX_MIB_SIZE];
+ TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
+ TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
RD_STATS pn_rd_stats;
av1_init_rd_stats(&pn_rd_stats);
av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
- memcpy(tx_above, xd->above_txfm_context,
- sizeof(TXFM_CONTEXT) * (mi_width >> 1));
- memcpy(tx_left, xd->left_txfm_context,
- sizeof(TXFM_CONTEXT) * (mi_height >> 1));
+ memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
+ memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bw) {
@@ -4137,8 +4237,8 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
const int max_blocks_wide = max_block_wide(xd, bsize, 0);
mbmi->tx_type = tx_type;
- mbmi->min_tx_size = TX_SIZES_ALL;
inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, rd_stats_stack);
+ mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]);
if (rd_stats->rate == INT_MAX) return INT64_MAX;
@@ -4350,7 +4450,8 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
if (x->skip_chroma_rd) return is_cost_valid;
- bsize = AOMMAX(BLOCK_8X8, bsize);
+ bsize = scale_chroma_bsize(mbmi->sb_type, xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y);
#endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
#if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -4426,6 +4527,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
int *skippable) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ assert(!is_inter_block(mbmi));
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
const BLOCK_SIZE bsize = mbmi->sb_type;
int this_rate;
@@ -4460,6 +4562,13 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
#endif // CONFIG_HIGHBITDEPTH
+#if CONFIG_PALETTE_DELTA_ENCODING
+ const MODE_INFO *above_mi = xd->above_mi;
+ const MODE_INFO *left_mi = xd->left_mi;
+ uint16_t color_cache[2 * PALETTE_MAX_SIZE];
+ const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache);
+#endif // CONFIG_PALETTE_DELTA_ENCODING
+
colors = colors_u > colors_v ? colors_u : colors_v;
if (colors > 1 && colors <= 64) {
int r, c, n, i, j;
@@ -4524,6 +4633,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
#if CONFIG_PALETTE_DELTA_ENCODING
+ optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
// Sort the U channel colors in ascending order.
for (i = 0; i < 2 * (n - 1); i += 2) {
int min_idx = i;
@@ -4563,7 +4673,11 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
write_uniform_cost(n, color_map[0]) +
av1_cost_bit(
av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 1);
- this_rate += av1_palette_color_cost_uv(pmi, cpi->common.bit_depth);
+ this_rate += av1_palette_color_cost_uv(pmi,
+#if CONFIG_PALETTE_DELTA_ENCODING
+ color_cache, n_cache,
+#endif // CONFIG_PALETTE_DELTA_ENCODING
+ cpi->common.bit_depth);
for (i = 0; i < rows; ++i) {
for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
int color_idx;
@@ -4660,6 +4774,7 @@ static int64_t pick_intra_angle_routine_sbuv(
int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
int *best_angle_delta, int64_t *best_rd) {
MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
+ assert(!is_inter_block(mbmi));
int this_rate;
int64_t this_rd;
RD_STATS tokenonly_rd_stats;
@@ -4687,6 +4802,7 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ assert(!is_inter_block(mbmi));
int i, angle_delta, best_angle_delta = 0;
int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
@@ -4736,12 +4852,23 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
#endif // CONFIG_EXT_INTRA
+static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
+ mbmi->uv_mode = DC_PRED;
+#if CONFIG_PALETTE
+ mbmi->palette_mode_info.palette_size[1] = 0;
+#endif // CONFIG_PALETTE
+#if CONFIG_FILTER_INTRA
+ mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
+#endif // CONFIG_FILTER_INTRA
+}
+
static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ assert(!is_inter_block(mbmi));
MB_MODE_INFO best_mbmi = *mbmi;
PREDICTION_MODE mode;
int64_t best_rd = INT64_MAX, this_rd;
@@ -4756,12 +4883,6 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
uint8_t *best_palette_color_map = NULL;
#endif // CONFIG_PALETTE
-#if CONFIG_FILTER_INTRA
- mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
-#endif // CONFIG_FILTER_INTRA
-#if CONFIG_PALETTE
- pmi->palette_size[1] = 0;
-#endif // CONFIG_PALETTE
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
#if CONFIG_EXT_INTRA
const int is_directional_mode =
@@ -4858,12 +4979,12 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
// Use an estimated rd for uv_intra based on DC_PRED if the
// appropriate speed flag is set.
(void)ctx;
+ init_sbuv_mode(&x->e_mbd.mi[0]->mbmi);
#if CONFIG_CB4X4
#if CONFIG_CHROMA_2X2
rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
bsize, max_tx_size);
#else
- max_tx_size = AOMMAX(max_tx_size, TX_4X4);
if (x->skip_chroma_rd) {
*rate_uv = 0;
*rate_uv_tokenonly = 0;
@@ -4893,7 +5014,6 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
}
#endif
-#if CONFIG_REF_MV
int mode_cost = 0;
int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
@@ -4924,13 +5044,9 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
return mode_cost;
}
}
-#else
- assert(is_inter_mode(mode));
- return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
-#endif // CONFIG_REF_MV
}
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
COMPOUND_TYPE comp_type) {
(void)bsize;
@@ -4945,304 +5061,7 @@ static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
default: assert(0); return 0;
}
}
-#endif // CONFIG_EXT_INTER
-
-static int set_and_cost_bmi_mvs(
- const AV1_COMP *const cpi, MACROBLOCK *x, MACROBLOCKD *xd, int i,
- PREDICTION_MODE mode, int_mv this_mv[2],
- int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME],
- int_mv seg_mvs[TOTAL_REFS_PER_FRAME],
-#if CONFIG_EXT_INTER
- int_mv compound_seg_newmvs[2],
-#endif // CONFIG_EXT_INTER
- int_mv *best_ref_mv[2], const int *mvjcost, int *mvcost[2], int mi_row,
- int mi_col) {
- MODE_INFO *const mic = xd->mi[0];
- const MB_MODE_INFO *const mbmi = &mic->mbmi;
- const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- int thismvcost = 0;
- int idx, idy;
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
- const int is_compound = has_second_ref(mbmi);
- int mode_ctx;
- (void)mi_row;
- (void)mi_col;
-
- switch (mode) {
- case NEWMV: this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
-#if CONFIG_EXT_INTER
- if (!cpi->common.allow_high_precision_mv)
- lower_mv_precision(&this_mv[0].as_mv, 0);
-#endif // CONFIG_EXT_INTER
-
-#if CONFIG_REF_MV
- for (idx = 0; idx < 1 + is_compound; ++idx) {
- this_mv[idx] = seg_mvs[mbmi->ref_frame[idx]];
- av1_set_mvcost(x, mbmi->ref_frame[idx], idx, mbmi->ref_mv_idx);
- thismvcost +=
- av1_mv_bit_cost(&this_mv[idx].as_mv, &best_ref_mv[idx]->as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT_SUB);
- }
- (void)mvjcost;
- (void)mvcost;
-#else
- thismvcost += av1_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
- mvjcost, mvcost, MV_COST_WEIGHT_SUB);
-#if !CONFIG_EXT_INTER
- if (is_compound) {
- this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
- thismvcost += av1_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
- mvjcost, mvcost, MV_COST_WEIGHT_SUB);
- }
-#endif // !CONFIG_EXT_INTER
-#endif // CONFIG_REF_MV
- break;
- case NEARMV:
- case NEARESTMV:
- this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
- if (is_compound)
- this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
- break;
- case ZEROMV: {
- int ref;
- for (ref = 0; ref < 1 + is_compound; ++ref) {
-#if CONFIG_GLOBAL_MOTION
- this_mv[ref].as_int =
- gm_get_motion_vector(
- &cpi->common.global_motion[mbmi->ref_frame[ref]],
- cpi->common.allow_high_precision_mv, mbmi->sb_type, mi_col,
- mi_row, i)
- .as_int;
-#else
- this_mv[ref].as_int = 0;
-#endif // CONFIG_GLOBAL_MOTION
- }
- break;
- }
-#if CONFIG_EXT_INTER
- case NEW_NEWMV:
- if (compound_seg_newmvs[0].as_int == INVALID_MV ||
- compound_seg_newmvs[1].as_int == INVALID_MV) {
- this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
- this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
- } else {
- this_mv[0].as_int = compound_seg_newmvs[0].as_int;
- this_mv[1].as_int = compound_seg_newmvs[1].as_int;
- }
- if (!cpi->common.allow_high_precision_mv)
- lower_mv_precision(&this_mv[0].as_mv, 0);
- if (!cpi->common.allow_high_precision_mv)
- lower_mv_precision(&this_mv[1].as_mv, 0);
-#if CONFIG_REF_MV
- av1_set_mvcost(x, mbmi->ref_frame[0], 0, mbmi->ref_mv_idx);
-#endif
- thismvcost += av1_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
- mvjcost, mvcost, MV_COST_WEIGHT_SUB);
-#if CONFIG_REF_MV
- av1_set_mvcost(x, mbmi->ref_frame[1], 1, mbmi->ref_mv_idx);
-#endif
- thismvcost += av1_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
- mvjcost, mvcost, MV_COST_WEIGHT_SUB);
- break;
- case NEW_NEARMV:
- case NEW_NEARESTMV:
- this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
- if (!cpi->common.allow_high_precision_mv)
- lower_mv_precision(&this_mv[0].as_mv, 0);
-#if CONFIG_REF_MV
- av1_set_mvcost(x, mbmi->ref_frame[0], 0, mbmi->ref_mv_idx);
-#endif
- thismvcost += av1_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
- mvjcost, mvcost, MV_COST_WEIGHT_SUB);
- this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
- break;
- case NEAR_NEWMV:
- case NEAREST_NEWMV:
- this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
- this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
- if (!cpi->common.allow_high_precision_mv)
- lower_mv_precision(&this_mv[1].as_mv, 0);
-#if CONFIG_REF_MV
- av1_set_mvcost(x, mbmi->ref_frame[1], 1, mbmi->ref_mv_idx);
-#endif
- thismvcost += av1_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
- mvjcost, mvcost, MV_COST_WEIGHT_SUB);
- break;
- case NEAREST_NEARMV:
- case NEAR_NEARESTMV:
- case NEAREST_NEARESTMV:
- case NEAR_NEARMV:
- this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
- this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
- break;
- case ZERO_ZEROMV:
-#if CONFIG_GLOBAL_MOTION
- this_mv[0].as_int =
- gm_get_motion_vector(&cpi->common.global_motion[mbmi->ref_frame[0]],
- cpi->common.allow_high_precision_mv,
- mbmi->sb_type, mi_col, mi_row, i)
- .as_int;
- this_mv[1].as_int =
- gm_get_motion_vector(&cpi->common.global_motion[mbmi->ref_frame[1]],
- cpi->common.allow_high_precision_mv,
- mbmi->sb_type, mi_col, mi_row, i)
- .as_int;
-#else
- this_mv[0].as_int = 0;
- this_mv[1].as_int = 0;
-#endif // CONFIG_GLOBAL_MOTION
- break;
-#endif // CONFIG_EXT_INTER
- default: break;
- }
-
- mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
- if (is_compound) mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
-
- mic->bmi[i].as_mode = mode;
-
-#if CONFIG_REF_MV
- if (mode == NEWMV) {
- mic->bmi[i].pred_mv[0].as_int =
- mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_int;
- if (is_compound)
- mic->bmi[i].pred_mv[1].as_int =
- mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_int;
- } else {
- mic->bmi[i].pred_mv[0].as_int = this_mv[0].as_int;
- if (is_compound) mic->bmi[i].pred_mv[1].as_int = this_mv[1].as_int;
- }
-#endif // CONFIG_REF_MV
-
- for (idy = 0; idy < num_4x4_blocks_high; ++idy)
- for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
- memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
-
-#if CONFIG_REF_MV
-#if CONFIG_EXT_INTER
- if (is_compound)
- mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
- else
-#endif // CONFIG_EXT_INTER
- mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
- mbmi->ref_frame, mbmi->sb_type, i);
-#else // CONFIG_REF_MV
- mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
-#endif // CONFIG_REF_MV
- return cost_mv_ref(cpi, mode, mode_ctx) + thismvcost;
-}
-
-static int64_t encode_inter_mb_segment_sub8x8(
- const AV1_COMP *const cpi, MACROBLOCK *x, int64_t best_yrd, int i,
- int *labelyrate, int64_t *distortion, int64_t *sse, ENTROPY_CONTEXT *ta,
- ENTROPY_CONTEXT *tl, int ir, int ic, int mi_row, int mi_col) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- struct macroblockd_plane *const pd = &xd->plane[0];
- struct macroblock_plane *const p = &x->plane[0];
- MODE_INFO *const mi = xd->mi[0];
- const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
- const int txb_width = max_block_wide(xd, plane_bsize, 0);
- const int txb_height = max_block_high(xd, plane_bsize, 0);
- const int width = block_size_wide[plane_bsize];
- const int height = block_size_high[plane_bsize];
- int idx, idy;
- const uint8_t *const src =
- &p->src.buf[av1_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
- uint8_t *const dst =
- &pd->dst.buf[av1_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
- int64_t thisdistortion = 0, thissse = 0;
- int thisrate = 0;
- TX_SIZE tx_size = mi->mbmi.tx_size;
- TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
- const int num_4x4_w = tx_size_wide_unit[tx_size];
- const int num_4x4_h = tx_size_high_unit[tx_size];
-#if !CONFIG_PVQ
- const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1);
-#else
- (void)cpi;
- (void)ta;
- (void)tl;
- (void)tx_type;
-#endif // !CONFIG_PVQ
-
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
- assert(IMPLIES(xd->lossless[mi->mbmi.segment_id], tx_size == TX_4X4));
- assert(IMPLIES(!xd->lossless[mi->mbmi.segment_id],
- tx_size == max_txsize_rect_lookup[mi->mbmi.sb_type]));
-#else
- assert(tx_size == TX_4X4);
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
-
- assert(tx_type == DCT_DCT);
-
- av1_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
-
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- aom_highbd_subtract_block(
- height, width, av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
- 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
- } else {
- aom_subtract_block(height, width,
- av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
- 8, src, p->src.stride, dst, pd->dst.stride);
- }
-#else
- aom_subtract_block(height, width,
- av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
- 8, src, p->src.stride, dst, pd->dst.stride);
-#endif // CONFIG_HIGHBITDEPTH
-
- for (idy = 0; idy < txb_height; idy += num_4x4_h) {
- for (idx = 0; idx < txb_width; idx += num_4x4_w) {
- int64_t dist, ssz, rd, rd1, rd2;
- int coeff_ctx;
- const int k = i + (idy * 2 + idx);
- const int block = av1_raster_order_to_block_index(tx_size, k);
- assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
- idx == 0 && idy == 0));
- coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), *(tl + (k >> 1)));
- av1_xform_quant(cm, x, 0, block, idy + (i >> 1), idx + (i & 0x01),
- BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
- if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)
- av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
- av1_dist_block(cpi, x, 0, BLOCK_8X8, block, idy + (i >> 1),
- idx + (i & 0x1), tx_size, &dist, &ssz,
- OUTPUT_HAS_PREDICTED_PIXELS);
- thisdistortion += dist;
- thissse += ssz;
-#if !CONFIG_PVQ
- thisrate +=
- av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, (ta + (k & 1)),
- (tl + (k >> 1)), cpi->sf.use_fast_coef_costing);
- *(ta + (k & 1)) = !(p->eobs[block] == 0);
- *(tl + (k >> 1)) = !(p->eobs[block] == 0);
-#else
- thisrate += x->rate;
-#endif // !CONFIG_PVQ
-#if CONFIG_EXT_TX
- if (tx_size == TX_8X4) {
- *(ta + (k & 1) + 1) = *(ta + (k & 1));
- }
- if (tx_size == TX_4X8) {
- *(tl + (k >> 1) + 1) = *(tl + (k >> 1));
- }
-#endif // CONFIG_EXT_TX
- rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion);
- rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse);
- rd = AOMMIN(rd1, rd2);
- if (rd >= best_yrd) return INT64_MAX;
- }
- }
-
- *distortion = thisdistortion;
- *labelyrate = thisrate;
- *sse = thissse;
-
- return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
-}
+#endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
typedef struct {
int eobs;
@@ -5252,20 +5071,18 @@ typedef struct {
int64_t bsse;
int64_t brdcost;
int_mv mvs[2];
-#if CONFIG_REF_MV
int_mv pred_mv[2];
-#endif // CONFIG_REF_MV
#if CONFIG_EXT_INTER
int_mv ref_mv[2];
#endif // CONFIG_EXT_INTER
-#if CONFIG_CB4X4
+#if CONFIG_CHROMA_2X2
ENTROPY_CONTEXT ta[4];
ENTROPY_CONTEXT tl[4];
#else
ENTROPY_CONTEXT ta[2];
ENTROPY_CONTEXT tl[2];
-#endif // CONFIG_CB4X4
+#endif // CONFIG_CHROMA_2X2
} SEG_RDSTAT;
typedef struct {
@@ -5293,37 +5110,13 @@ static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
(mv->col >> 3) > mv_limits->col_max;
}
-static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
- MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
- struct macroblock_plane *const p = &x->plane[0];
- struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
-
- p->src.buf =
- &p->src.buf[av1_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
- assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
- pd->pre[0].buf =
- &pd->pre[0].buf[av1_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)];
- if (has_second_ref(mbmi))
- pd->pre[1].buf =
- &pd->pre[1]
- .buf[av1_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)];
-}
-
-static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
- struct buf_2d orig_pre[2]) {
- MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
- x->plane[0].src = orig_src;
- x->e_mbd.plane[0].pre[0] = orig_pre[0];
- if (has_second_ref(mbmi)) x->e_mbd.plane[0].pre[1] = orig_pre[1];
-}
-
// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
// TODO(aconverse): Find out if this is still productive then clean up or remove
static int check_best_zero_mv(
const AV1_COMP *const cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
-#if CONFIG_REF_MV && CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER
const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
-#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER
int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
int mi_row, int mi_col) {
@@ -5355,21 +5148,12 @@ static int check_best_zero_mv(
frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
(ref_frames[1] <= INTRA_FRAME ||
frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
-#if CONFIG_REF_MV
int16_t rfc =
av1_mode_context_analyzer(mode_context, ref_frames, bsize, block);
-#else
- int16_t rfc = mode_context[ref_frames[0]];
-#endif // CONFIG_REF_MV
int c1 = cost_mv_ref(cpi, NEARMV, rfc);
int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
-#if !CONFIG_REF_MV
- (void)bsize;
- (void)block;
-#endif // !CONFIG_REF_MV
-
if (this_mode == NEARMV) {
if (c1 > c3) return 0;
} else if (this_mode == NEARESTMV) {
@@ -5390,40 +5174,25 @@ static int check_best_zero_mv(
}
}
#if CONFIG_EXT_INTER
- else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAREST_NEARMV ||
- this_mode == NEAR_NEARESTMV || this_mode == NEAR_NEARMV ||
+ else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
this_mode == ZERO_ZEROMV) &&
frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
-#if CONFIG_REF_MV
int16_t rfc = compound_mode_context[ref_frames[0]];
-#else
- int16_t rfc = mode_context[ref_frames[0]];
-#endif // CONFIG_REF_MV
- int c1 = cost_mv_ref(cpi, NEAREST_NEARMV, rfc);
int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, rfc);
int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, rfc);
- int c4 = cost_mv_ref(cpi, NEAR_NEARESTMV, rfc);
int c5 = cost_mv_ref(cpi, NEAR_NEARMV, rfc);
- if (this_mode == NEAREST_NEARMV) {
- if (c1 > c3) return 0;
- } else if (this_mode == NEAREST_NEARESTMV) {
+ if (this_mode == NEAREST_NEARESTMV) {
if (c2 > c3) return 0;
- } else if (this_mode == NEAR_NEARESTMV) {
- if (c4 > c3) return 0;
} else if (this_mode == NEAR_NEARMV) {
if (c5 > c3) return 0;
} else {
assert(this_mode == ZERO_ZEROMV);
if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
- (c3 >= c1 && frame_mv[NEAREST_NEARMV][ref_frames[0]].as_int == 0 &&
- frame_mv[NEAREST_NEARMV][ref_frames[1]].as_int == 0) ||
(c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
- frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0) ||
- (c3 >= c4 && frame_mv[NEAR_NEARESTMV][ref_frames[0]].as_int == 0 &&
- frame_mv[NEAR_NEARESTMV][ref_frames[1]].as_int == 0))
+ frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0))
return 0;
}
}
@@ -5435,7 +5204,8 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
int mi_col,
#if CONFIG_EXT_INTER
- int_mv *ref_mv_sub8x8[2],
+ int_mv *ref_mv_sub8x8[2], const uint8_t *mask,
+ int mask_stride,
#endif // CONFIG_EXT_INTER
int *rate_mv, const int block) {
const AV1_COMMON *const cm = &cpi->common;
@@ -5596,17 +5366,26 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
best_mv->col >>= 3;
best_mv->row >>= 3;
-#if CONFIG_REF_MV
av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
// Small-range full-pixel motion search.
bestsme =
av1_refining_search_8p_c(x, sadpb, search_range, &cpi->fn_ptr[bsize],
+#if CONFIG_EXT_INTER
+ mask, mask_stride, id,
+#endif
&ref_mv[id].as_mv, second_pred);
- if (bestsme < INT_MAX)
- bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
- second_pred, &cpi->fn_ptr[bsize], 1);
+ if (bestsme < INT_MAX) {
+#if CONFIG_EXT_INTER
+ if (mask)
+ bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
+ second_pred, mask, mask_stride, id,
+ &cpi->fn_ptr[bsize], 1);
+ else
+#endif
+ bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
+ second_pred, &cpi->fn_ptr[bsize], 1);
+ }
x->mv_limits = tmp_mv_limits;
@@ -5639,7 +5418,11 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0,
cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred, pw, ph, 1);
+ &dis, &sse, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, id,
+#endif
+ pw, ph, 1);
// Restore the reference frames.
pd->pre[0] = backup_pred;
@@ -5649,7 +5432,11 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0,
cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred, pw, ph, 0);
+ &dis, &sse, second_pred,
+#if CONFIG_EXT_INTER
+ mask, mask_stride, id,
+#endif
+ pw, ph, 0);
}
}
@@ -5673,9 +5460,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[ref] = backup_yv12[ref][i];
}
-#if CONFIG_REF_MV
av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
if (bsize >= BLOCK_8X8)
#endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
@@ -5691,947 +5476,6 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
}
}
-#if CONFIG_REF_MV && !CONFIG_EXT_INTER
-static void update_mv_search_and_seg_mvs(
- int *const run_mv_search, int_mv *const seg_mvs, int has_second_rf,
- const MV_REFERENCE_FRAME *const ref_frame,
- const SEG_RDSTAT *const ref_rdstat, int_mv *const bsi_ref_mv[2]) {
- if (has_second_rf) {
- if (seg_mvs[ref_frame[0]].as_int == ref_rdstat->mvs[0].as_int &&
- ref_rdstat->mvs[0].as_int != INVALID_MV)
- if (bsi_ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
- --*run_mv_search;
-
- if (seg_mvs[ref_frame[1]].as_int == ref_rdstat->mvs[1].as_int &&
- ref_rdstat->mvs[1].as_int != INVALID_MV)
- if (bsi_ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
- --*run_mv_search;
- } else {
- if (bsi_ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
- ref_rdstat->mvs[0].as_int != INVALID_MV) {
- *run_mv_search = 0;
- seg_mvs[ref_frame[0]].as_int = ref_rdstat->mvs[0].as_int;
- }
- }
-}
-#endif // CONFIG_REF_MV && !CONFIG_EXT_INTER
-
-static int64_t rd_pick_inter_best_sub8x8_mode(
- const AV1_COMP *const cpi, MACROBLOCK *x, int_mv *best_ref_mv,
- int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate,
- int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
- int mvthresh, int_mv seg_mvs[4][TOTAL_REFS_PER_FRAME],
-#if CONFIG_EXT_INTER
- int_mv compound_seg_newmvs[4][2],
-#endif // CONFIG_EXT_INTER
- BEST_SEG_INFO *bsi_buf, int filter_idx, int mi_row, int mi_col) {
- BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
-#if CONFIG_REF_MV
- int_mv tmp_ref_mv[2];
-#endif // CONFIG_REF_MV
- MACROBLOCKD *xd = &x->e_mbd;
- MODE_INFO *mi = xd->mi[0];
- MB_MODE_INFO *mbmi = &mi->mbmi;
- int mode_idx;
- int k, br = 0, idx, idy;
- int64_t bd = 0, block_sse = 0;
- PREDICTION_MODE this_mode;
- const AV1_COMMON *cm = &cpi->common;
- struct macroblock_plane *const p = &x->plane[0];
- struct macroblockd_plane *const pd = &xd->plane[0];
- const int label_count = 4;
- int64_t this_segment_rd = 0;
- int label_mv_thresh;
- int segmentyrate = 0;
- const BLOCK_SIZE bsize = mbmi->sb_type;
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-#if CONFIG_CB4X4
- ENTROPY_CONTEXT t_above[4], t_left[4];
-#else
- ENTROPY_CONTEXT t_above[2], t_left[2];
-#endif // CONFIG_CB4X4
- int subpelmv = 1, have_ref = 0;
- const int has_second_rf = has_second_ref(mbmi);
- const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
- MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
-#if CONFIG_PVQ
- od_rollback_buffer pre_buf;
-
- od_encode_checkpoint(&x->daala_enc, &pre_buf);
-#endif // CONFIG_PVQ
-#if CONFIG_EXT_TX && CONFIG_RECT_TX
- mbmi->tx_size =
- xd->lossless[mbmi->segment_id] ? TX_4X4 : max_txsize_rect_lookup[bsize];
-#else
- mbmi->tx_size = TX_4X4;
-#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
-
- av1_zero(*bsi);
-
- bsi->segment_rd = best_rd;
- bsi->ref_mv[0] = best_ref_mv;
- bsi->ref_mv[1] = second_best_ref_mv;
- bsi->mvp.as_int = best_ref_mv->as_int;
- bsi->mvthresh = mvthresh;
-
- for (idx = 0; idx < 4; ++idx) bsi->modes[idx] = ZEROMV;
-
-#if CONFIG_REF_MV
- for (idx = 0; idx < 4; ++idx) {
- for (k = NEARESTMV; k <= NEWMV; ++k) {
- bsi->rdstat[idx][INTER_OFFSET(k)].pred_mv[0].as_int = INVALID_MV;
- bsi->rdstat[idx][INTER_OFFSET(k)].pred_mv[1].as_int = INVALID_MV;
-
- bsi->rdstat[idx][INTER_OFFSET(k)].mvs[0].as_int = INVALID_MV;
- bsi->rdstat[idx][INTER_OFFSET(k)].mvs[1].as_int = INVALID_MV;
- }
- }
-#endif // CONFIG_REF_MV
-
- memcpy(t_above, pd->above_context, sizeof(t_above));
- memcpy(t_left, pd->left_context, sizeof(t_left));
-
- // 64 makes this threshold really big effectively
- // making it so that we very rarely check mvs on
- // segments. setting this to 1 would make mv thresh
- // roughly equal to what it is for macroblocks
- label_mv_thresh = 1 * bsi->mvthresh / label_count;
-
- // Segmentation method overheads
- for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
- for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
- // TODO(jingning,rbultje): rewrite the rate-distortion optimization
- // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
- int_mv mode_mv[MB_MODE_COUNT][2];
- int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
- PREDICTION_MODE mode_selected = ZEROMV;
- int64_t new_best_rd = INT64_MAX;
- const int index = idy * 2 + idx;
- int ref;
-#if CONFIG_REF_MV
- CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
- uint8_t ref_mv_count[2];
-#endif // CONFIG_REF_MV
-#if CONFIG_EXT_INTER
- int_mv ref_mvs_sub8x8[2][2];
-#endif // CONFIG_EXT_INTER
-#if CONFIG_PVQ
- od_rollback_buffer idx_buf, post_buf;
- od_encode_checkpoint(&x->daala_enc, &idx_buf);
- od_encode_checkpoint(&x->daala_enc, &post_buf);
-#endif // CONFIG_PVQ
-
- for (ref = 0; ref < 1 + has_second_rf; ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
-#if CONFIG_EXT_INTER
- int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
- av1_update_mv_context(cm, xd, mi, frame, mv_ref_list, index, mi_row,
- mi_col, NULL);
-#endif // CONFIG_EXT_INTER
-#if CONFIG_GLOBAL_MOTION
- frame_mv[ZEROMV][frame].as_int =
- gm_get_motion_vector(&cm->global_motion[frame],
- cm->allow_high_precision_mv, mbmi->sb_type,
- mi_col, mi_row, index)
- .as_int;
-#else // CONFIG_GLOBAL_MOTION
- frame_mv[ZEROMV][frame].as_int = 0;
-#endif // CONFIG_GLOBAL_MOTION
- av1_append_sub8x8_mvs_for_idx(cm, xd, index, ref, mi_row, mi_col,
-#if CONFIG_REF_MV
- ref_mv_stack[ref], &ref_mv_count[ref],
-#endif // CONFIG_REF_MV
-#if CONFIG_EXT_INTER
- mv_ref_list,
-#endif // CONFIG_EXT_INTER
- &frame_mv[NEARESTMV][frame],
- &frame_mv[NEARMV][frame]);
-
-#if CONFIG_REF_MV
- tmp_ref_mv[ref] = frame_mv[NEARESTMV][mbmi->ref_frame[ref]];
- lower_mv_precision(&tmp_ref_mv[ref].as_mv, cm->allow_high_precision_mv);
- bsi->ref_mv[ref] = &tmp_ref_mv[ref];
- mbmi_ext->ref_mvs[frame][0] = tmp_ref_mv[ref];
-#endif // CONFIG_REF_MV
-
-#if CONFIG_EXT_INTER
- mv_ref_list[0].as_int = frame_mv[NEARESTMV][frame].as_int;
- mv_ref_list[1].as_int = frame_mv[NEARMV][frame].as_int;
- av1_find_best_ref_mvs(cm->allow_high_precision_mv, mv_ref_list,
- &ref_mvs_sub8x8[0][ref], &ref_mvs_sub8x8[1][ref]);
-
- if (has_second_rf) {
-#if CONFIG_GLOBAL_MOTION
- frame_mv[ZERO_ZEROMV][frame].as_int =
- gm_get_motion_vector(&cm->global_motion[frame],
- cm->allow_high_precision_mv, mbmi->sb_type,
- mi_col, mi_row, index)
- .as_int;
-#else
- frame_mv[ZERO_ZEROMV][frame].as_int = 0;
-#endif // CONFIG_GLOBAL_MOTION
- frame_mv[NEAREST_NEARESTMV][frame].as_int =
- frame_mv[NEARESTMV][frame].as_int;
-
- if (ref == 0) {
- frame_mv[NEAREST_NEARMV][frame].as_int =
- frame_mv[NEARESTMV][frame].as_int;
- frame_mv[NEAR_NEARESTMV][frame].as_int =
- frame_mv[NEARMV][frame].as_int;
- frame_mv[NEAREST_NEWMV][frame].as_int =
- frame_mv[NEARESTMV][frame].as_int;
- frame_mv[NEAR_NEWMV][frame].as_int = frame_mv[NEARMV][frame].as_int;
- frame_mv[NEAR_NEARMV][frame].as_int =
- frame_mv[NEARMV][frame].as_int;
- } else if (ref == 1) {
- frame_mv[NEAREST_NEARMV][frame].as_int =
- frame_mv[NEARMV][frame].as_int;
- frame_mv[NEAR_NEARESTMV][frame].as_int =
- frame_mv[NEARESTMV][frame].as_int;
- frame_mv[NEW_NEARESTMV][frame].as_int =
- frame_mv[NEARESTMV][frame].as_int;
- frame_mv[NEW_NEARMV][frame].as_int = frame_mv[NEARMV][frame].as_int;
- frame_mv[NEAR_NEARMV][frame].as_int =
- frame_mv[NEARMV][frame].as_int;
- }
- }
-#endif // CONFIG_EXT_INTER
- }
-
-// search for the best motion vector on this segment
-#if CONFIG_EXT_INTER
- for (this_mode = (has_second_rf ? NEAREST_NEARESTMV : NEARESTMV);
- this_mode <= (has_second_rf ? NEW_NEWMV : NEWMV); ++this_mode)
-#else
- for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode)
-#endif // CONFIG_EXT_INTER
- {
- const struct buf_2d orig_src = x->plane[0].src;
- struct buf_2d orig_pre[2];
- // This flag controls if the motion estimation will kick off. When it
- // is set to a non-zero value, the encoder will force motion estimation.
- int run_mv_search = 0;
-
- mode_idx = INTER_OFFSET(this_mode);
-#if CONFIG_EXT_INTER
- for (ref = 0; ref < 1 + has_second_rf; ++ref)
- bsi->ref_mv[ref]->as_int = ref_mvs_sub8x8[0][ref].as_int;
-#endif // CONFIG_EXT_INTER
- bsi->rdstat[index][mode_idx].brdcost = INT64_MAX;
- if (!(inter_mode_mask & (1 << this_mode))) continue;
-
-#if CONFIG_REF_MV
- run_mv_search = 2;
-#if !CONFIG_EXT_INTER
- if (filter_idx > 0 && this_mode == NEWMV) {
- const BEST_SEG_INFO *ref_bsi = bsi_buf;
- const SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[index][mode_idx];
-
- update_mv_search_and_seg_mvs(&run_mv_search, seg_mvs[index],
- has_second_rf, mbmi->ref_frame,
- ref_rdstat, bsi->ref_mv);
-
- if (run_mv_search != 0 && filter_idx > 1) {
- ref_bsi = bsi_buf + 1;
- ref_rdstat = &ref_bsi->rdstat[index][mode_idx];
- run_mv_search = 2;
- update_mv_search_and_seg_mvs(&run_mv_search, seg_mvs[index],
- has_second_rf, mbmi->ref_frame,
- ref_rdstat, bsi->ref_mv);
- }
- }
-#endif // !CONFIG_EXT_INTER
-#endif // CONFIG_REF_MV
-
-#if CONFIG_GLOBAL_MOTION
- if (cm->global_motion[mbmi->ref_frame[0]].wmtype == IDENTITY &&
- (!has_second_rf ||
- cm->global_motion[mbmi->ref_frame[1]].wmtype == IDENTITY))
-#endif // CONFIG_GLOBAL_MOTION
-
- if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
-#if CONFIG_REF_MV && CONFIG_EXT_INTER
- mbmi_ext->compound_mode_context,
-#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
- frame_mv, this_mode, mbmi->ref_frame, bsize,
- index, mi_row, mi_col))
- continue;
-
- memcpy(orig_pre, pd->pre, sizeof(orig_pre));
- memcpy(bsi->rdstat[index][mode_idx].ta, t_above,
- sizeof(bsi->rdstat[index][mode_idx].ta));
- memcpy(bsi->rdstat[index][mode_idx].tl, t_left,
- sizeof(bsi->rdstat[index][mode_idx].tl));
-#if CONFIG_PVQ
- od_encode_rollback(&x->daala_enc, &idx_buf);
-#endif // CONFIG_PVQ
-
- // motion search for newmv (single predictor case only)
- if (!has_second_rf &&
-#if CONFIG_EXT_INTER
- have_newmv_in_inter_mode(this_mode) &&
- (seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV)
-#else
- this_mode == NEWMV &&
- (seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV ||
- run_mv_search)
-#endif // CONFIG_EXT_INTER
- ) {
- int step_param = 0;
- int bestsme = INT_MAX;
- int sadpb = x->sadperbit4;
- MV mvp_full;
- int max_mv;
- int cost_list[5];
- MvLimits tmp_mv_limits = x->mv_limits;
-
- /* Is the best so far sufficiently good that we cant justify doing
- * and new motion search. */
- if (new_best_rd < label_mv_thresh) break;
-
-#if CONFIG_EXT_INTER
- bsi->mvp.as_int = bsi->ref_mv[0]->as_int;
-#else
-// use previous block's result as next block's MV predictor.
-#if !CONFIG_REF_MV
- if (index > 0) {
- bsi->mvp.as_int = mi->bmi[index - 1].as_mv[0].as_int;
- if (index == 2)
- bsi->mvp.as_int = mi->bmi[index - 2].as_mv[0].as_int;
- }
-#endif // !CONFIG_REF_MV
-#endif // CONFIG_EXT_INTER
- max_mv = (index == 0) ? (int)x->max_mv_context[mbmi->ref_frame[0]]
- : AOMMAX(abs(bsi->mvp.as_mv.row),
- abs(bsi->mvp.as_mv.col)) >>
- 3;
-
- if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
- // Take wtd average of the step_params based on the last frame's
- // max mv magnitude and the best ref mvs of the current block for
- // the given reference.
- step_param =
- (av1_init_search_range(max_mv) + cpi->mv_step_param) / 2;
- } else {
- step_param = cpi->mv_step_param;
- }
-
-#if CONFIG_REF_MV
- mvp_full.row = bsi->ref_mv[0]->as_mv.row >> 3;
- mvp_full.col = bsi->ref_mv[0]->as_mv.col >> 3;
-#else
- mvp_full.row = bsi->mvp.as_mv.row >> 3;
- mvp_full.col = bsi->mvp.as_mv.col >> 3;
-#endif // CONFIG_REF_MV
-
- if (cpi->sf.adaptive_motion_search) {
- mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
- mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
- step_param = AOMMAX(step_param, 8);
- }
-
- // adjust src pointer for this block
- mi_buf_shift(x, index);
-
- av1_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv);
-
- x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
-
-#if CONFIG_REF_MV
- av1_set_mvcost(x, mbmi->ref_frame[0], 0, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
- bestsme = av1_full_pixel_search(
- cpi, x, bsize, &mvp_full, step_param, sadpb,
- cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
- &bsi->ref_mv[0]->as_mv, INT_MAX, 1);
-
- x->mv_limits = tmp_mv_limits;
-
- if (bestsme < INT_MAX) {
- int distortion;
- if (cpi->sf.use_upsampled_references) {
- int best_mv_var;
- const int try_second =
- x->second_best_mv.as_int != INVALID_MV &&
- x->second_best_mv.as_int != x->best_mv.as_int;
- const int pw = block_size_wide[bsize];
- const int ph = block_size_high[bsize];
- // Use up-sampled reference frames.
- struct buf_2d backup_pred = pd->pre[0];
- const YV12_BUFFER_CONFIG *upsampled_ref =
- get_upsampled_ref(cpi, mbmi->ref_frame[0]);
-
- // Set pred for Y plane
- setup_pred_plane(
- &pd->pre[0], bsize, upsampled_ref->y_buffer,
- upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
- upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
- pd->subsampling_x, pd->subsampling_y);
-
- // adjust pred pointer for this block
- pd->pre[0].buf =
- &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, index,
- pd->pre[0].stride))
- << 3];
-
- best_mv_var = cpi->find_fractional_mv_step(
- x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
- cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
- &distortion, &x->pred_sse[mbmi->ref_frame[0]], NULL, pw, ph,
- 1);
-
- if (try_second) {
- int this_var;
- MV best_mv = x->best_mv.as_mv;
- const MV ref_mv = bsi->ref_mv[0]->as_mv;
- const int minc =
- AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
- const int maxc =
- AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
- const int minr =
- AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
- const int maxr =
- AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
-
- x->best_mv = x->second_best_mv;
- if (x->best_mv.as_mv.row * 8 <= maxr &&
- x->best_mv.as_mv.row * 8 >= minr &&
- x->best_mv.as_mv.col * 8 <= maxc &&
- x->best_mv.as_mv.col * 8 >= minc) {
- this_var = cpi->find_fractional_mv_step(
- x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
- cond_cost_list(cpi, cost_list), x->nmvjointcost,
- x->mvcost, &distortion, &x->pred_sse[mbmi->ref_frame[0]],
- NULL, pw, ph, 1);
- if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
- x->best_mv.as_mv = best_mv;
- }
- }
-
- // Restore the reference frames.
- pd->pre[0] = backup_pred;
- } else {
- cpi->find_fractional_mv_step(
- x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
- cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
- &distortion, &x->pred_sse[mbmi->ref_frame[0]], NULL, 0, 0, 0);
- }
-
-// save motion search result for use in compound prediction
-#if CONFIG_EXT_INTER
- seg_mvs[index][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
-#else
- seg_mvs[index][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
-#endif // CONFIG_EXT_INTER
- }
-
- if (cpi->sf.adaptive_motion_search)
- x->pred_mv[mbmi->ref_frame[0]] = x->best_mv.as_mv;
-
-#if CONFIG_EXT_INTER
- mode_mv[this_mode][0] = x->best_mv;
-#else
- mode_mv[NEWMV][0] = x->best_mv;
-#endif // CONFIG_EXT_INTER
-
- // restore src pointers
- mi_buf_restore(x, orig_src, orig_pre);
- }
-
- if (has_second_rf) {
-#if CONFIG_EXT_INTER
- if (seg_mvs[index][mbmi->ref_frame[1]].as_int == INVALID_MV ||
- seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV)
-#else
- if (seg_mvs[index][mbmi->ref_frame[1]].as_int == INVALID_MV ||
- seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV)
-#endif // CONFIG_EXT_INTER
- continue;
- }
-
-#if CONFIG_DUAL_FILTER
- (void)run_mv_search;
-#endif // CONFIG_DUAL_FILTER
-
- if (has_second_rf &&
-#if CONFIG_EXT_INTER
- this_mode == NEW_NEWMV &&
-#else
- this_mode == NEWMV &&
-#endif // CONFIG_EXT_INTER
-#if CONFIG_DUAL_FILTER
- (mbmi->interp_filter[0] == EIGHTTAP_REGULAR || run_mv_search))
-#else
- (mbmi->interp_filter == EIGHTTAP_REGULAR || run_mv_search))
-#endif // CONFIG_DUAL_FILTER
- {
- // adjust src pointers
- mi_buf_shift(x, index);
- if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
- int rate_mv;
- frame_mv[this_mode][mbmi->ref_frame[0]].as_int =
- seg_mvs[index][mbmi->ref_frame[0]].as_int;
- frame_mv[this_mode][mbmi->ref_frame[1]].as_int =
- seg_mvs[index][mbmi->ref_frame[1]].as_int;
- joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
- mi_col,
-#if CONFIG_EXT_INTER
- bsi->ref_mv,
-#endif // CONFIG_EXT_INTER
- &rate_mv, index);
-#if CONFIG_EXT_INTER
- compound_seg_newmvs[index][0].as_int =
- frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
- compound_seg_newmvs[index][1].as_int =
- frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
-#else
- seg_mvs[index][mbmi->ref_frame[0]].as_int =
- frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
- seg_mvs[index][mbmi->ref_frame[1]].as_int =
- frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
-#endif // CONFIG_EXT_INTER
- }
- // restore src pointers
- mi_buf_restore(x, orig_src, orig_pre);
- }
-
- bsi->rdstat[index][mode_idx].brate = set_and_cost_bmi_mvs(
- cpi, x, xd, index, this_mode, mode_mv[this_mode], frame_mv,
- seg_mvs[index],
-#if CONFIG_EXT_INTER
- compound_seg_newmvs[index],
-#endif // CONFIG_EXT_INTER
- bsi->ref_mv, x->nmvjointcost, x->mvcost, mi_row, mi_col);
-
- for (ref = 0; ref < 1 + has_second_rf; ++ref) {
- bsi->rdstat[index][mode_idx].mvs[ref].as_int =
- mode_mv[this_mode][ref].as_int;
- if (num_4x4_blocks_wide > 1)
- bsi->rdstat[index + 1][mode_idx].mvs[ref].as_int =
- mode_mv[this_mode][ref].as_int;
- if (num_4x4_blocks_high > 1)
- bsi->rdstat[index + 2][mode_idx].mvs[ref].as_int =
- mode_mv[this_mode][ref].as_int;
-#if CONFIG_REF_MV
- bsi->rdstat[index][mode_idx].pred_mv[ref].as_int =
- mi->bmi[index].pred_mv[ref].as_int;
- if (num_4x4_blocks_wide > 1)
- bsi->rdstat[index + 1][mode_idx].pred_mv[ref].as_int =
- mi->bmi[index].pred_mv[ref].as_int;
- if (num_4x4_blocks_high > 1)
- bsi->rdstat[index + 2][mode_idx].pred_mv[ref].as_int =
- mi->bmi[index].pred_mv[ref].as_int;
-#endif // CONFIG_REF_MV
-#if CONFIG_EXT_INTER
- bsi->rdstat[index][mode_idx].ref_mv[ref].as_int =
- bsi->ref_mv[ref]->as_int;
- if (num_4x4_blocks_wide > 1)
- bsi->rdstat[index + 1][mode_idx].ref_mv[ref].as_int =
- bsi->ref_mv[ref]->as_int;
- if (num_4x4_blocks_high > 1)
- bsi->rdstat[index + 2][mode_idx].ref_mv[ref].as_int =
- bsi->ref_mv[ref]->as_int;
-#endif // CONFIG_EXT_INTER
- }
-
- // Trap vectors that reach beyond the UMV borders
- if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) ||
- (has_second_rf &&
- mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv)))
- continue;
-
- if (filter_idx > 0) {
- BEST_SEG_INFO *ref_bsi = bsi_buf;
- subpelmv = 0;
- have_ref = 1;
-
- for (ref = 0; ref < 1 + has_second_rf; ++ref) {
- subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
-#if CONFIG_EXT_INTER
- if (have_newmv_in_inter_mode(this_mode))
- have_ref &=
- ((mode_mv[this_mode][ref].as_int ==
- ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) &&
- (bsi->ref_mv[ref]->as_int ==
- ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int));
- else
-#endif // CONFIG_EXT_INTER
- have_ref &= mode_mv[this_mode][ref].as_int ==
- ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int;
- }
-
- have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0;
-
- if (filter_idx > 1 && !subpelmv && !have_ref) {
- ref_bsi = bsi_buf + 1;
- have_ref = 1;
- for (ref = 0; ref < 1 + has_second_rf; ++ref)
-#if CONFIG_EXT_INTER
- if (have_newmv_in_inter_mode(this_mode))
- have_ref &=
- ((mode_mv[this_mode][ref].as_int ==
- ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) &&
- (bsi->ref_mv[ref]->as_int ==
- ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int));
- else
-#endif // CONFIG_EXT_INTER
- have_ref &= mode_mv[this_mode][ref].as_int ==
- ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int;
-
- have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0;
- }
-
- if (!subpelmv && have_ref &&
- ref_bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) {
-#if CONFIG_REF_MV
- bsi->rdstat[index][mode_idx].byrate =
- ref_bsi->rdstat[index][mode_idx].byrate;
- bsi->rdstat[index][mode_idx].bdist =
- ref_bsi->rdstat[index][mode_idx].bdist;
- bsi->rdstat[index][mode_idx].bsse =
- ref_bsi->rdstat[index][mode_idx].bsse;
- bsi->rdstat[index][mode_idx].brate +=
- ref_bsi->rdstat[index][mode_idx].byrate;
- bsi->rdstat[index][mode_idx].eobs =
- ref_bsi->rdstat[index][mode_idx].eobs;
-
- bsi->rdstat[index][mode_idx].brdcost =
- RDCOST(x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate,
- bsi->rdstat[index][mode_idx].bdist);
-
- memcpy(bsi->rdstat[index][mode_idx].ta,
- ref_bsi->rdstat[index][mode_idx].ta,
- sizeof(bsi->rdstat[index][mode_idx].ta));
- memcpy(bsi->rdstat[index][mode_idx].tl,
- ref_bsi->rdstat[index][mode_idx].tl,
- sizeof(bsi->rdstat[index][mode_idx].tl));
-#else
- memcpy(&bsi->rdstat[index][mode_idx],
- &ref_bsi->rdstat[index][mode_idx], sizeof(SEG_RDSTAT));
-#endif // CONFIG_REF_MV
- if (num_4x4_blocks_wide > 1)
- bsi->rdstat[index + 1][mode_idx].eobs =
- ref_bsi->rdstat[index + 1][mode_idx].eobs;
- if (num_4x4_blocks_high > 1)
- bsi->rdstat[index + 2][mode_idx].eobs =
- ref_bsi->rdstat[index + 2][mode_idx].eobs;
-
- if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) {
-#if CONFIG_REF_MV
- // If the NEWMV mode is using the same motion vector as the
- // NEARESTMV mode, skip the rest rate-distortion calculations
- // and use the inferred motion vector modes.
- if (this_mode == NEWMV) {
- if (has_second_rf) {
- if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
- bsi->ref_mv[0]->as_int &&
- bsi->rdstat[index][mode_idx].mvs[1].as_int ==
- bsi->ref_mv[1]->as_int)
- continue;
- } else {
- if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
- bsi->ref_mv[0]->as_int)
- continue;
- }
- }
-#endif // CONFIG_REF_MV
- mode_selected = this_mode;
- new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
-#if CONFIG_PVQ
- od_encode_checkpoint(&x->daala_enc, &post_buf);
-#endif // CONFIG_PVQ
- }
- continue;
- }
- }
-
- bsi->rdstat[index][mode_idx].brdcost = encode_inter_mb_segment_sub8x8(
- cpi, x, bsi->segment_rd - this_segment_rd, index,
- &bsi->rdstat[index][mode_idx].byrate,
- &bsi->rdstat[index][mode_idx].bdist,
- &bsi->rdstat[index][mode_idx].bsse, bsi->rdstat[index][mode_idx].ta,
- bsi->rdstat[index][mode_idx].tl, idy, idx, mi_row, mi_col);
-
- if (bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) {
- bsi->rdstat[index][mode_idx].brdcost += RDCOST(
- x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate, 0);
- bsi->rdstat[index][mode_idx].brate +=
- bsi->rdstat[index][mode_idx].byrate;
- bsi->rdstat[index][mode_idx].eobs = p->eobs[index];
- if (num_4x4_blocks_wide > 1)
- bsi->rdstat[index + 1][mode_idx].eobs = p->eobs[index + 1];
- if (num_4x4_blocks_high > 1)
- bsi->rdstat[index + 2][mode_idx].eobs = p->eobs[index + 2];
- }
-
- if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) {
-#if CONFIG_REF_MV
- // If the NEWMV mode is using the same motion vector as the
- // NEARESTMV mode, skip the rest rate-distortion calculations
- // and use the inferred motion vector modes.
- if (this_mode == NEWMV) {
- if (has_second_rf) {
- if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
- bsi->ref_mv[0]->as_int &&
- bsi->rdstat[index][mode_idx].mvs[1].as_int ==
- bsi->ref_mv[1]->as_int)
- continue;
- } else {
- if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
- bsi->ref_mv[0]->as_int)
- continue;
- }
- }
-#endif // CONFIG_REF_MV
- mode_selected = this_mode;
- new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
-
-#if CONFIG_PVQ
- od_encode_checkpoint(&x->daala_enc, &post_buf);
-#endif // CONFIG_PVQ
- }
- } /*for each 4x4 mode*/
-
- if (new_best_rd == INT64_MAX) {
- int iy, midx;
- for (iy = index + 1; iy < 4; ++iy)
-#if CONFIG_EXT_INTER
- for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
-#else
- for (midx = 0; midx < INTER_MODES; ++midx)
-#endif // CONFIG_EXT_INTER
- bsi->rdstat[iy][midx].brdcost = INT64_MAX;
- bsi->segment_rd = INT64_MAX;
-#if CONFIG_PVQ
- od_encode_rollback(&x->daala_enc, &pre_buf);
-#endif // CONFIG_PVQ
- return INT64_MAX;
- }
-
- mode_idx = INTER_OFFSET(mode_selected);
- memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above));
- memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left));
-#if CONFIG_PVQ
- od_encode_rollback(&x->daala_enc, &post_buf);
-#endif // CONFIG_PVQ
-
-#if CONFIG_EXT_INTER
- bsi->ref_mv[0]->as_int = bsi->rdstat[index][mode_idx].ref_mv[0].as_int;
- if (has_second_rf)
- bsi->ref_mv[1]->as_int = bsi->rdstat[index][mode_idx].ref_mv[1].as_int;
-#endif // CONFIG_EXT_INTER
- set_and_cost_bmi_mvs(cpi, x, xd, index, mode_selected,
- mode_mv[mode_selected], frame_mv, seg_mvs[index],
-#if CONFIG_EXT_INTER
- compound_seg_newmvs[index],
-#endif // CONFIG_EXT_INTER
- bsi->ref_mv, x->nmvjointcost, x->mvcost, mi_row,
- mi_col);
-
- br += bsi->rdstat[index][mode_idx].brate;
- bd += bsi->rdstat[index][mode_idx].bdist;
- block_sse += bsi->rdstat[index][mode_idx].bsse;
- segmentyrate += bsi->rdstat[index][mode_idx].byrate;
- this_segment_rd += bsi->rdstat[index][mode_idx].brdcost;
-
- if (this_segment_rd > bsi->segment_rd) {
- int iy, midx;
- for (iy = index + 1; iy < 4; ++iy)
-#if CONFIG_EXT_INTER
- for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
-#else
- for (midx = 0; midx < INTER_MODES; ++midx)
-#endif // CONFIG_EXT_INTER
- bsi->rdstat[iy][midx].brdcost = INT64_MAX;
- bsi->segment_rd = INT64_MAX;
-#if CONFIG_PVQ
- od_encode_rollback(&x->daala_enc, &pre_buf);
-#endif // CONFIG_PVQ
- return INT64_MAX;
- }
- }
- } /* for each label */
-#if CONFIG_PVQ
- od_encode_rollback(&x->daala_enc, &pre_buf);
-#endif // CONFIG_PVQ
-
- bsi->r = br;
- bsi->d = bd;
- bsi->segment_yrate = segmentyrate;
- bsi->segment_rd = this_segment_rd;
- bsi->sse = block_sse;
-
- // update the coding decisions
- for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode;
-
-#if CONFIG_DAALA_DIST
- // Compute prediction (i.e. skip) and decoded distortion by daala-distortion.
- {
- const int src_stride = p->src.stride;
- const int dst_stride = pd->dst.stride;
- uint8_t *src = p->src.buf;
- uint8_t *dst = pd->dst.buf;
- const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
- const int use_activity_masking = 0;
- const int qm = OD_HVS_QM;
- const int bsw = block_size_wide[plane_bsize];
- const int bsh = block_size_high[plane_bsize];
- int64_t rd1, rd2;
- int64_t daala_sse, daala_dist;
- TX_SIZE tx_size = mbmi->tx_size;
-
-#if CONFIG_HIGHBITDEPTH
- uint8_t *recon_8x8;
- DECLARE_ALIGNED(16, uint16_t, recon16[8 * 8]);
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- recon_8x8 = CONVERT_TO_BYTEPTR(recon16);
- else
- recon_8x8 = (uint8_t *)recon16;
-#else
- DECLARE_ALIGNED(16, uint8_t, recon_8x8[8 * 8]);
-#endif // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_PVQ
- use_activity_masking = x->daala_enc.use_activity_masking;
-#endif // CONFIG_PVQ
-
- // For each of sub8x8 prediction block in a 8x8 block
- for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
- for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
- int i = idy * 2 + idx;
- const uint8_t *const src_sub8x8 =
- src + av1_raster_block_offset(BLOCK_8X8, i, p->src.stride);
- uint8_t *const dst_sub8x8 =
- dst + av1_raster_block_offset(BLOCK_8X8, i, pd->dst.stride);
- uint8_t *recon_sub8x8 = recon_8x8 + (idy * 8 + idx) * 4;
- const int txb_width = max_block_wide(xd, plane_bsize, 0);
- const int txb_height = max_block_high(xd, plane_bsize, 0);
- int idx_, idy_;
-
- av1_build_inter_predictor_sub8x8(xd, 0, i, idy, idx, mi_row, mi_col);
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- aom_highbd_subtract_block(
- height, width,
- av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
- src_sub8x8, p->src.stride, dst_sub8x8, pd->dst.stride, xd->bd);
- } else {
- aom_subtract_block(
- height, width,
- av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
- src_sub8x8, p->src.stride, dst_sub8x8, pd->dst.stride);
- }
-#else
- aom_subtract_block(
- bsh, bsw, av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
- 8, src_sub8x8, p->src.stride, dst_sub8x8, pd->dst.stride);
-#endif // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- aom_highbd_convolve_copy(dst_sub8x8, dst_stride, recon_sub8x8, 8,
- NULL, 0, NULL, 0, bsw, bsh, xd->bd);
- } else {
-#endif // CONFIG_HIGHBITDEPTH
- aom_convolve_copy(dst_sub8x8, dst_stride, recon_sub8x8, 8, NULL, 0,
- NULL, 0, bsw, bsh);
-#if CONFIG_HIGHBITDEPTH
- }
-#endif // CONFIG_HIGHBITDEPTH
-
- // To get decoded pixels, do 4x4 xform and quant for each 4x4 block
- // in a sub8x8 prediction block. In case remaining parts of
- // sub8x8 inter mode rdo assume pd->dst stores predicted pixels,
- // use local buffer to store decoded pixels.
- for (idy_ = 0; idy_ < txb_height; idy_++) {
- for (idx_ = 0; idx_ < txb_width; idx_++) {
- int coeff_ctx = 0;
- const tran_low_t *dqcoeff;
- uint16_t eob;
- const PLANE_TYPE plane_type = PLANE_TYPE_Y;
- uint8_t *recon_4x4 = recon_sub8x8 + (idy_ * 8 + idx_) * 4;
- const int block_raster_idx = (idy + idy_) * 2 + (idx + idx_);
- const int block =
- av1_raster_order_to_block_index(tx_size, block_raster_idx);
- TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-
- dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- av1_xform_quant(cm, x, 0, block, idy + idy_, idx + idx_, BLOCK_8X8,
- tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
- if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)
- av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
-
- eob = p->eobs[block];
- av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size,
- recon_4x4, 8, eob);
- }
- }
- }
- }
- // Compute daala-distortion for a 8x8 block
- daala_sse = av1_daala_dist(src, src_stride, pd->dst.buf, dst_stride, 8, 8,
- qm, use_activity_masking, x->qindex)
- << 4;
-
- daala_dist = av1_daala_dist(src, src_stride, recon_8x8, 8, 8, 8, qm,
- use_activity_masking, x->qindex)
- << 4;
-
- bsi->sse = daala_sse;
- bsi->d = daala_dist;
-
- rd1 = RDCOST(x->rdmult, x->rddiv, bsi->r, bsi->d);
- rd2 = RDCOST(x->rdmult, x->rddiv, 0, bsi->sse);
- bsi->segment_rd = AOMMIN(rd1, rd2);
- }
-#endif // CONFIG_DAALA_DIST
-
- if (bsi->segment_rd > best_rd) return INT64_MAX;
- /* set it to the best */
- for (idx = 0; idx < 4; idx++) {
- mode_idx = INTER_OFFSET(bsi->modes[idx]);
- mi->bmi[idx].as_mv[0].as_int = bsi->rdstat[idx][mode_idx].mvs[0].as_int;
- if (has_second_ref(mbmi))
- mi->bmi[idx].as_mv[1].as_int = bsi->rdstat[idx][mode_idx].mvs[1].as_int;
-#if CONFIG_REF_MV
- mi->bmi[idx].pred_mv[0] = bsi->rdstat[idx][mode_idx].pred_mv[0];
- if (has_second_ref(mbmi))
- mi->bmi[idx].pred_mv[1] = bsi->rdstat[idx][mode_idx].pred_mv[1];
-#endif // CONFIG_REF_MV
-#if CONFIG_EXT_INTER
- mi->bmi[idx].ref_mv[0].as_int = bsi->rdstat[idx][mode_idx].ref_mv[0].as_int;
- if (has_second_rf)
- mi->bmi[idx].ref_mv[1].as_int =
- bsi->rdstat[idx][mode_idx].ref_mv[1].as_int;
-#endif // CONFIG_EXT_INTER
- x->plane[0].eobs[idx] = bsi->rdstat[idx][mode_idx].eobs;
- mi->bmi[idx].as_mode = bsi->modes[idx];
- }
-
- /*
- * used to set mbmi->mv.as_int
- */
- *returntotrate = bsi->r;
- *returndistortion = bsi->d;
- *returnyrate = bsi->segment_yrate;
- *skippable = av1_is_skippable_in_plane(x, BLOCK_8X8, 0);
- *psse = bsi->sse;
- mbmi->mode = bsi->modes[3];
-
- return bsi->segment_rd;
-}
-
static void estimate_ref_frame_costs(const AV1_COMMON *cm,
const MACROBLOCKD *xd, int segment_id,
unsigned int *ref_costs_single,
@@ -6808,15 +5652,13 @@ static void setup_buffer_inter(
av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
// Gets an initial list of candidate vectors from neighbours and orders them
- av1_find_mv_refs(
- cm, xd, mi, ref_frame,
-#if CONFIG_REF_MV
- &mbmi_ext->ref_mv_count[ref_frame], mbmi_ext->ref_mv_stack[ref_frame],
+ av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
+ mbmi_ext->ref_mv_stack[ref_frame],
#if CONFIG_EXT_INTER
- mbmi_ext->compound_mode_context,
+ mbmi_ext->compound_mode_context,
#endif // CONFIG_EXT_INTER
-#endif // CONFIG_REF_MV
- candidates, mi_row, mi_col, NULL, NULL, mbmi_ext->mode_context);
+ candidates, mi_row, mi_col, NULL, NULL,
+ mbmi_ext->mode_context);
// Candidate refinement carried out at encoder and decoder
av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
@@ -6882,9 +5724,7 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
av1_set_mv_search_range(&x->mv_limits, &ref_mv);
-#if CONFIG_REF_MV
av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
// Work out the size of the first step in the mv step search.
// 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
@@ -6996,8 +5836,11 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph,
- 1);
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
+#if CONFIG_EXT_INTER
+ NULL, 0, 0,
+#endif
+ pw, ph, 1);
if (try_second) {
const int minc =
@@ -7021,7 +5864,11 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
- &dis, &x->pred_sse[ref], NULL, pw, ph, 1);
+ &dis, &x->pred_sse[ref], NULL,
+#if CONFIG_EXT_INTER
+ NULL, 0, 0,
+#endif
+ pw, ph, 1);
if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
x->best_mv.as_mv = best_mv;
}
@@ -7034,8 +5881,11 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0,
- 0);
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
+#if CONFIG_EXT_INTER
+ NULL, 0, 0,
+#endif
+ 0, 0, 0);
}
#if CONFIG_MOTION_VAR
break;
@@ -7077,131 +5927,287 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) {
}
#if CONFIG_EXT_INTER
-#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
-static void do_masked_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
- const uint8_t *mask, int mask_stride,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- int_mv *tmp_mv, int *rate_mv, int ref_idx) {
+static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, const MV *other_mv,
+ int mi_row, int mi_col, const int block,
+ int ref_idx, uint8_t *second_pred) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const int pw = block_size_wide[bsize];
+ const int ph = block_size_high[bsize];
MACROBLOCKD *xd = &x->e_mbd;
- const AV1_COMMON *cm = &cpi->common;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
- int bestsme = INT_MAX;
- int step_param;
- int sadpb = x->sadperbit16;
- MV mvp_full;
- int ref = mbmi->ref_frame[ref_idx];
- MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
-
- MvLimits tmp_mv_limits = x->mv_limits;
-
- const YV12_BUFFER_CONFIG *scaled_ref_frame =
- av1_get_scaled_ref_frame(cpi, ref);
- int i;
+ const int other_ref = mbmi->ref_frame[!ref_idx];
+#if CONFIG_DUAL_FILTER
+ InterpFilter interp_filter[2] = {
+ (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0],
+ (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1]
+ };
+#else
+ const InterpFilter interp_filter = mbmi->interp_filter;
+#endif // CONFIG_DUAL_FILTER
+ struct scale_factors sf;
+#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
+ const int ic = block & 1;
+ const int ir = (block - ic) >> 1;
+ const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
+ const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
+#if CONFIG_GLOBAL_MOTION
+ WarpedMotionParams *const wm = &xd->global_motion[other_ref];
+ int is_global = is_global_mv_block(xd->mi[0], block, wm->wmtype);
+#endif // CONFIG_GLOBAL_MOTION
+#else
+ (void)block;
+#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
- MV pred_mv[3];
- pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
- pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
- pred_mv[2] = x->pred_mv[ref];
+ // This function should only ever be called for compound modes
+ assert(has_second_ref(mbmi));
-#if CONFIG_REF_MV
- av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
+ struct buf_2d backup_yv12[MAX_MB_PLANE];
+ const YV12_BUFFER_CONFIG *const scaled_ref_frame =
+ av1_get_scaled_ref_frame(cpi, other_ref);
if (scaled_ref_frame) {
+ int i;
// Swap out the reference frame for a version that's been scaled to
// match the resolution of the current frame, allowing the existing
// motion search code to be used without additional modifications.
for (i = 0; i < MAX_MB_PLANE; i++)
- backup_yv12[i] = xd->plane[i].pre[ref_idx];
-
- av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
+ backup_yv12[i] = xd->plane[i].pre[!ref_idx];
+ av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
}
- av1_set_mv_search_range(&x->mv_limits, &ref_mv);
+// Since we have scaled the reference frames to match the size of the current
+// frame we must use a unit scaling factor during mode selection.
+#if CONFIG_HIGHBITDEPTH
+ av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
+ cm->height, cm->use_highbitdepth);
+#else
+ av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
+ cm->height);
+#endif // CONFIG_HIGHBITDEPTH
- // Work out the size of the first step in the mv step search.
- // 0 here is maximum length first step. 1 is MAX >> 1 etc.
- if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
- // Take wtd average of the step_params based on the last frame's
- // max mv magnitude and that based on the best ref mvs of the current
- // block for the given reference.
- step_param =
- (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
- 2;
+ struct buf_2d ref_yv12;
+
+ const int plane = 0;
+ ConvolveParams conv_params = get_conv_params(0, plane);
+#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ WarpTypesAllowed warp_types;
+#if CONFIG_GLOBAL_MOTION
+ warp_types.global_warp_allowed = is_global;
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+ warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
+#endif // CONFIG_WARPED_MOTION
+#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+
+ // Initialized here because of compiler problem in Visual Studio.
+ ref_yv12 = xd->plane[plane].pre[!ref_idx];
+
+// Get the prediction block from the 'other' reference frame.
+#if CONFIG_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ av1_highbd_build_inter_predictor(
+ ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
+ 0, interp_filter,
+#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ &warp_types, p_col, p_row,
+#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
} else {
- step_param = cpi->mv_step_param;
+#endif // CONFIG_HIGHBITDEPTH
+ av1_build_inter_predictor(
+ ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
+ &conv_params, interp_filter,
+#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ &warp_types, p_col, p_row, plane, !ref_idx,
+#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+ MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
+#if CONFIG_HIGHBITDEPTH
}
+#endif // CONFIG_HIGHBITDEPTH
- // TODO(debargha): is show_frame needed here?
- if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size && cm->show_frame) {
- int boffset =
- 2 * (b_width_log2_lookup[cm->sb_size] -
- AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
- step_param = AOMMAX(step_param, boffset);
+ if (scaled_ref_frame) {
+ // Restore the prediction frame pointers to their unscaled versions.
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[!ref_idx] = backup_yv12[i];
}
+}
- if (cpi->sf.adaptive_motion_search) {
- int bwl = b_width_log2_lookup[bsize];
- int bhl = b_height_log2_lookup[bsize];
- int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
+// Search for the best mv for one component of a compound,
+// given that the other component is fixed.
+static void compound_single_motion_search(
+ const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv,
+ int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask,
+ int mask_stride, int *rate_mv, const int block, int ref_idx) {
+ const int pw = block_size_wide[bsize];
+ const int ph = block_size_high[bsize];
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const int ref = mbmi->ref_frame[ref_idx];
+ int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
- if (tlevel < 5) step_param += 2;
+ struct buf_2d backup_yv12[MAX_MB_PLANE];
+ const YV12_BUFFER_CONFIG *const scaled_ref_frame =
+ av1_get_scaled_ref_frame(cpi, ref);
- // prev_mv_sad is not setup for dynamically scaled frames.
- if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
- for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
- if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
- x->pred_mv[ref].row = 0;
- x->pred_mv[ref].col = 0;
- tmp_mv->as_int = INVALID_MV;
+ // Check that this is either an interinter or an interintra block
+ assert(has_second_ref(mbmi) ||
+ (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
- if (scaled_ref_frame) {
- int j;
- for (j = 0; j < MAX_MB_PLANE; ++j)
- xd->plane[j].pre[ref_idx] = backup_yv12[j];
- }
- return;
- }
- }
- }
+ if (scaled_ref_frame) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[ref_idx];
+ av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
}
- mvp_full = pred_mv[x->mv_best_ref_index[ref]];
+ struct buf_2d orig_yv12;
+ int bestsme = INT_MAX;
+ int sadpb = x->sadperbit16;
+ MV *const best_mv = &x->best_mv.as_mv;
+ int search_range = 3;
+
+ MvLimits tmp_mv_limits = x->mv_limits;
- mvp_full.col >>= 3;
- mvp_full.row >>= 3;
+ // Initialized here because of compiler problem in Visual Studio.
+ if (ref_idx) {
+ orig_yv12 = pd->pre[0];
+ pd->pre[0] = pd->pre[ref_idx];
+ }
- bestsme = av1_masked_full_pixel_diamond(
- cpi, x, mask, mask_stride, &mvp_full, step_param, sadpb,
- MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
- &tmp_mv->as_mv, ref_idx);
+ // Do compound motion search on the current reference frame.
+ av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
+
+ // Use the mv result from the single mode as mv predictor.
+ *best_mv = *this_mv;
+
+ best_mv->col >>= 3;
+ best_mv->row >>= 3;
+
+ av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
+
+ // Small-range full-pixel motion search.
+ bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
+ &cpi->fn_ptr[bsize], mask, mask_stride,
+ ref_idx, &ref_mv.as_mv, second_pred);
+ if (bestsme < INT_MAX) {
+ if (mask)
+ bestsme =
+ av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
+ mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
+ else
+ bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
+ &cpi->fn_ptr[bsize], 1);
+ }
x->mv_limits = tmp_mv_limits;
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
- av1_find_best_masked_sub_pixel_tree_up(
- cpi, x, mask, mask_stride, mi_row, mi_col, &tmp_mv->as_mv, &ref_mv,
- cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], ref_idx,
- cpi->sf.use_upsampled_references);
- }
- *rate_mv = av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
- x->mvcost, MV_COST_WEIGHT);
+ unsigned int sse;
+ if (cpi->sf.use_upsampled_references) {
+ // Use up-sampled reference frames.
+ struct buf_2d backup_pred = pd->pre[0];
+ const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+
+ // Set pred for Y plane
+ setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
+ upsampled_ref->y_crop_width,
+ upsampled_ref->y_crop_height, upsampled_ref->y_stride,
+ (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
+ pd->subsampling_y);
+
+// If bsize < BLOCK_8X8, adjust pred pointer for this block
+#if !CONFIG_CB4X4
+ if (bsize < BLOCK_8X8)
+ pd->pre[0].buf =
+ &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
+ pd->pre[0].stride))
+ << 3];
+#endif // !CONFIG_CB4X4
+
+ bestsme = cpi->find_fractional_mv_step(
+ x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
+ x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
+ mask_stride, ref_idx, pw, ph, 1);
+
+ // Restore the reference frames.
+ pd->pre[0] = backup_pred;
+ } else {
+ (void)block;
+ bestsme = cpi->find_fractional_mv_step(
+ x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
+ x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
+ mask_stride, ref_idx, pw, ph, 0);
+ }
+ }
+
+ // Restore the pointer to the first (possibly scaled) prediction buffer.
+ if (ref_idx) pd->pre[0] = orig_yv12;
+
+ if (bestsme < INT_MAX) *this_mv = *best_mv;
- if (cpi->sf.adaptive_motion_search && cm->show_frame)
- x->pred_mv[ref] = tmp_mv->as_mv;
+ *rate_mv = 0;
if (scaled_ref_frame) {
+ // Restore the prediction frame pointers to their unscaled versions.
+ int i;
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[ref_idx] = backup_yv12[i];
}
+
+ av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
+ *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
}
+// Wrapper for compound_single_motion_search, for the common case
+// where the second prediction is also an inter mode.
+static void compound_single_motion_search_interinter(
+ const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
+ int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
+ const int block, int ref_idx) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+
+ // This function should only ever be called for compound modes
+ assert(has_second_ref(mbmi));
+
+// Prediction buffer from second frame.
+#if CONFIG_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
+ uint8_t *second_pred;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
+ else
+ second_pred = (uint8_t *)second_pred_alloc_16;
+#else
+ DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
+#endif // CONFIG_HIGHBITDEPTH
+
+ MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
+ const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
+
+ build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
+ ref_idx, second_pred);
+
+ compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
+ second_pred, mask, mask_stride, rate_mv, block,
+ ref_idx);
+}
+
+#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
static void do_masked_motion_search_indexed(
- const AV1_COMP *const cpi, MACROBLOCK *x,
+ const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
// NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
@@ -7213,23 +6219,22 @@ static void do_masked_motion_search_indexed(
mask = av1_get_compound_type_mask(comp_data, sb_type);
- if (which == 0 || which == 2)
- do_masked_motion_search(cpi, x, mask, mask_stride, bsize, mi_row, mi_col,
- &tmp_mv[0], &rate_mv[0], 0);
-
- if (which == 1 || which == 2) {
-// get the negative mask
-#if CONFIG_COMPOUND_SEGMENT
- uint8_t inv_mask_buf[2 * MAX_SB_SQUARE];
- const int h = block_size_high[bsize];
- mask = av1_get_compound_type_mask_inverse(
- comp_data, inv_mask_buf, h, mask_stride, mask_stride, sb_type);
-#else
- mask = av1_get_compound_type_mask_inverse(comp_data, sb_type);
-#endif // CONFIG_COMPOUND_SEGMENT
- do_masked_motion_search(cpi, x, mask, mask_stride, bsize, mi_row, mi_col,
- &tmp_mv[1], &rate_mv[1], 1);
- }
+ int_mv frame_mv[TOTAL_REFS_PER_FRAME];
+ MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
+ assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4);
+
+ frame_mv[rf[0]].as_int = cur_mv[0].as_int;
+ frame_mv[rf[1]].as_int = cur_mv[1].as_int;
+ if (which == 0 || which == 1) {
+ compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
+ mi_col, mask, mask_stride, rate_mv,
+ 0, which);
+ } else if (which == 2) {
+ joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
+ mask_stride, rate_mv, 0);
+ }
+ tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
+ tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
}
#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
#endif // CONFIG_EXT_INTER
@@ -7275,7 +6280,7 @@ static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
const int f_index = bsize - BLOCK_8X8;
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
- uint32_t esq[2][4], var;
+ uint32_t esq[2][4];
int64_t tl, br;
#if CONFIG_HIGHBITDEPTH
@@ -7285,23 +6290,22 @@ static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
}
#endif // CONFIG_HIGHBITDEPTH
- var = cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
- var = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2,
- stride0, &esq[0][1]);
- var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
- pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
- var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
- pred0 + bh / 2 * stride0 + bw / 2, stride0,
- &esq[0][3]);
- var = cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
- var = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2,
- stride1, &esq[1][1]);
- var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
- pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
- var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
- pred1 + bh / 2 * stride1 + bw / 2, stride0,
- &esq[1][3]);
- (void)var;
+ cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
+ cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
+ &esq[0][1]);
+ cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
+ pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
+ cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
+ pred0 + bh / 2 * stride0 + bw / 2, stride0,
+ &esq[0][3]);
+ cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
+ cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
+ &esq[1][1]);
+ cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
+ pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
+ cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
+ pred1 + bh / 2 * stride1 + bw / 2, stride0,
+ &esq[1][3]);
tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
(int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
@@ -7353,16 +6357,6 @@ static InterpFilter predict_interp_filter(
single_filter[NEARESTMV][refs[1]])
best_filter = single_filter[NEARESTMV][refs[0]];
break;
- case NEAREST_NEARMV:
- if (single_filter[NEARESTMV][refs[0]] ==
- single_filter[NEARMV][refs[1]])
- best_filter = single_filter[NEARESTMV][refs[0]];
- break;
- case NEAR_NEARESTMV:
- if (single_filter[NEARMV][refs[0]] ==
- single_filter[NEARESTMV][refs[1]])
- best_filter = single_filter[NEARMV][refs[0]];
- break;
case NEAR_NEARMV:
if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
best_filter = single_filter[NEARMV][refs[0]];
@@ -7575,6 +6569,7 @@ static int64_t pick_interinter_wedge(const AV1_COMP *const cpi,
int wedge_sign = 0;
assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
+ assert(cpi->common.allow_masked_compound);
if (cpi->sf.fast_wedge_sign_estimate) {
wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
@@ -7688,6 +6683,7 @@ static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
int wedge_index = -1;
assert(is_interintra_wedge_used(bsize));
+ assert(cpi->common.allow_interintra_compound);
rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
@@ -7715,15 +6711,13 @@ static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
}
}
-static int interinter_compound_motion_search(const AV1_COMP *const cpi,
- MACROBLOCK *x,
- const BLOCK_SIZE bsize,
- const int this_mode, int mi_row,
- int mi_col) {
+static int interinter_compound_motion_search(
+ const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
+ const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int_mv tmp_mv[2];
- int rate_mvs[2], tmp_rate_mv = 0;
+ int tmp_rate_mv = 0;
const INTERINTER_COMPOUND_DATA compound_data = {
#if CONFIG_WEDGE
mbmi->wedge_index,
@@ -7736,20 +6730,17 @@ static int interinter_compound_motion_search(const AV1_COMP *const cpi,
mbmi->interinter_compound_type
};
if (this_mode == NEW_NEWMV) {
- do_masked_motion_search_indexed(cpi, x, &compound_data, bsize, mi_row,
- mi_col, tmp_mv, rate_mvs, 2);
- tmp_rate_mv = rate_mvs[0] + rate_mvs[1];
+ do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
+ mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
mbmi->mv[0].as_int = tmp_mv[0].as_int;
mbmi->mv[1].as_int = tmp_mv[1].as_int;
} else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
- do_masked_motion_search_indexed(cpi, x, &compound_data, bsize, mi_row,
- mi_col, tmp_mv, rate_mvs, 0);
- tmp_rate_mv = rate_mvs[0];
+ do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
+ mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
mbmi->mv[0].as_int = tmp_mv[0].as_int;
} else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
- do_masked_motion_search_indexed(cpi, x, &compound_data, bsize, mi_row,
- mi_col, tmp_mv, rate_mvs, 1);
- tmp_rate_mv = rate_mvs[1];
+ do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
+ mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
mbmi->mv[1].as_int = tmp_mv[1].as_int;
}
return tmp_rate_mv;
@@ -7760,6 +6751,7 @@ static int64_t build_and_cost_compound_type(
const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv,
BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1,
int *strides, int mi_row, int mi_col) {
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int rate_sum;
@@ -7775,9 +6767,9 @@ static int64_t build_and_cost_compound_type(
if (have_newmv_in_inter_mode(this_mode) &&
use_masked_motion_search(compound_type)) {
- *out_rate_mv = interinter_compound_motion_search(cpi, x, bsize, this_mode,
- mi_row, mi_col);
- av1_build_inter_predictors_sby(xd, mi_row, mi_col, ctx, bsize);
+ *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
+ this_mode, mi_row, mi_col);
+ av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
@@ -7830,9 +6822,6 @@ typedef struct {
// Pointer to array of motion vectors to use for each ref and their rates
// Should point to first of 2 arrays in 2D array
int *single_newmv_rate;
- // Pointers costs of compound inter-intra and inter-inter predictions
- int *compmode_interintra_cost;
- int *compmode_interinter_cost;
// Pointer to array of predicted rate-distortion
// Should point to first of 2 arrays in 2D array
int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
@@ -7872,14 +6861,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
- joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL,
- rate_mv, 0);
+ joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
+ 0, rate_mv, 0);
} else {
*rate_mv = 0;
for (i = 0; i < 2; ++i) {
-#if CONFIG_REF_MV
av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
*rate_mv += av1_mv_bit_cost(
&frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
@@ -7887,21 +6874,31 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
}
} else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
-#if CONFIG_REF_MV
- av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
- *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
- &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ frame_mv[refs[0]].as_int =
+ mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
+ compound_single_motion_search_interinter(
+ cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
+ } else {
+ av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
+ *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ }
} else {
assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
-#if CONFIG_REF_MV
- av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
- *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
- &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ frame_mv[refs[1]].as_int =
+ mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
+ compound_single_motion_search_interinter(
+ cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
+ } else {
+ av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
+ *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ }
}
#else
// Initialize mv using single prediction mode result.
@@ -7913,9 +6910,7 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
} else {
*rate_mv = 0;
for (i = 0; i < 2; ++i) {
-#if CONFIG_REF_MV
av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
-#endif // CONFIG_REF_MV
*rate_mv += av1_mv_bit_cost(&frame_mv[refs[i]].as_mv,
&mbmi_ext->ref_mvs[refs[i]][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
@@ -7986,7 +6981,7 @@ int64_t interpolation_filter_search(
set_default_interp_filters(mbmi, assign_filter);
*switchable_rate = av1_get_switchable_rate(cpi, xd);
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, orig_dst, bsize);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
skip_txfm_sb, skip_sse_sb);
*rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist);
@@ -8022,7 +7017,7 @@ int64_t interpolation_filter_search(
mbmi->interp_filter = (InterpFilter)i;
#endif // CONFIG_DUAL_FILTER
tmp_rs = av1_get_switchable_rate(cpi, xd);
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, orig_dst, bsize);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
&tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist);
@@ -8077,6 +7072,7 @@ static int64_t motion_mode_rd(
int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
const int *refs, int rate_mv,
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+ int_mv *const single_newmv,
#if CONFIG_EXT_INTER
int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi,
#if CONFIG_MOTION_VAR
@@ -8183,10 +7179,10 @@ static int64_t motion_mode_rd(
if (!has_subpel_mv_component(xd->mi[0], xd, 1))
mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
#endif // CONFIG_DUAL_FILTER
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, orig_dst, bsize);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
#if CONFIG_EXT_INTER
} else {
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, orig_dst, bsize);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
#endif // CONFIG_EXT_INTER
}
av1_build_obmc_inter_prediction(
@@ -8214,10 +7210,55 @@ static int64_t motion_mode_rd(
: cm->interp_filter;
#endif // CONFIG_DUAL_FILTER
- if (find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
- mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
- &mbmi->wm_params[0], mi_row, mi_col) == 0) {
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, bsize);
+ if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
+ mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
+ &mbmi->wm_params[0], mi_row, mi_col)) {
+ // Refine MV for NEWMV mode
+ if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
+ int tmp_rate_mv = 0;
+ const int_mv mv0 = mbmi->mv[0];
+ WarpedMotionParams wm_params0 = mbmi->wm_params[0];
+
+ // Refine MV in a small range.
+ av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
+
+ // Keep the refined MV and WM parameters.
+ if (mv0.as_int != mbmi->mv[0].as_int) {
+ const int ref = refs[0];
+ const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+
+ tmp_rate_mv =
+ av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
+
+ if (cpi->sf.adaptive_motion_search)
+ x->pred_mv[ref] = mbmi->mv[0].as_mv;
+
+ single_newmv[ref] = mbmi->mv[0];
+
+ if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
+ refs[0])) {
+ tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
+ }
+#if CONFIG_EXT_INTER
+ tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
+#else
+ tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
+#endif // CONFIG_EXT_INTER
+#if CONFIG_DUAL_FILTER
+ if (!has_subpel_mv_component(xd->mi[0], xd, 0))
+ mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
+ if (!has_subpel_mv_component(xd->mi[0], xd, 1))
+ mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
+#endif // CONFIG_DUAL_FILTER
+ } else {
+ // Restore the old MV and WM parameters.
+ mbmi->mv[0] = mv0;
+ mbmi->wm_params[0] = wm_params0;
+ }
+ }
+
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
&tmp_dist, skip_txfm_sb, skip_sse_sb);
} else {
@@ -8446,16 +7487,16 @@ static int64_t handle_inter_mode(
int rate_mv = 0;
#if CONFIG_EXT_INTER
int pred_exists = 1;
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
const int bw = block_size_wide[bsize];
+#endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
int_mv single_newmv[TOTAL_REFS_PER_FRAME];
#if CONFIG_INTERINTRA
const unsigned int *const interintra_mode_cost =
cpi->interintra_mode_cost[size_group_lookup[bsize]];
#endif // CONFIG_INTERINTRA
const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
-#if CONFIG_REF_MV
uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-#endif // CONFIG_REF_MV
#else
int_mv *const single_newmv = args->single_newmv;
#endif // CONFIG_EXT_INTER
@@ -8484,10 +7525,19 @@ static int64_t handle_inter_mode(
int16_t mode_ctx;
#if CONFIG_EXT_INTER
- *args->compmode_interintra_cost = 0;
+#if CONFIG_INTERINTRA
+ int compmode_interintra_cost = 0;
mbmi->use_wedge_interintra = 0;
- *args->compmode_interinter_cost = 0;
+#endif
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+ int compmode_interinter_cost = 0;
mbmi->interinter_compound_type = COMPOUND_AVERAGE;
+#endif
+
+#if CONFIG_INTERINTRA
+ if (!cm->allow_interintra_compound && is_comp_interintra_pred)
+ return INT64_MAX;
+#endif // CONFIG_INTERINTRA
// is_comp_interintra_pred implies !is_comp_pred
assert(!is_comp_interintra_pred || (!is_comp_pred));
@@ -8495,7 +7545,6 @@ static int64_t handle_inter_mode(
assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
#endif // CONFIG_EXT_INTER
-#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (is_comp_pred)
mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
@@ -8503,9 +7552,6 @@ static int64_t handle_inter_mode(
#endif // CONFIG_EXT_INTER
mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
mbmi->ref_frame, bsize, -1);
-#else // CONFIG_REF_MV
- mode_ctx = mbmi_ext->mode_context[refs[0]];
-#endif // CONFIG_REF_MV
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
@@ -8545,7 +7591,6 @@ static int64_t handle_inter_mode(
mbmi->mv[i].as_int = cur_mv[i].as_int;
}
-#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (this_mode == NEAREST_NEARESTMV)
#else
@@ -8569,7 +7614,7 @@ static int64_t handle_inter_mode(
#if CONFIG_EXT_INTER
if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
- if (this_mode == NEAREST_NEWMV || this_mode == NEAREST_NEARMV) {
+ if (this_mode == NEAREST_NEWMV) {
cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
@@ -8578,7 +7623,7 @@ static int64_t handle_inter_mode(
mbmi->mv[0].as_int = cur_mv[0].as_int;
}
- if (this_mode == NEW_NEARESTMV || this_mode == NEAR_NEARESTMV) {
+ if (this_mode == NEW_NEARESTMV) {
cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
@@ -8590,8 +7635,7 @@ static int64_t handle_inter_mode(
if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
int ref_mv_idx = mbmi->ref_mv_idx + 1;
- if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARESTMV ||
- this_mode == NEAR_NEARMV) {
+ if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
@@ -8600,8 +7644,7 @@ static int64_t handle_inter_mode(
mbmi->mv[0].as_int = cur_mv[0].as_int;
}
- if (this_mode == NEW_NEARMV || this_mode == NEAREST_NEARMV ||
- this_mode == NEAR_NEARMV) {
+ if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
@@ -8626,7 +7669,6 @@ static int64_t handle_inter_mode(
}
}
#endif // CONFIG_EXT_INTER
-#endif // CONFIG_REF_MV
// do first prediction into the destination buffer. Do the next
// prediction into a temporary buffer. Then keep track of which one
@@ -8659,7 +7701,7 @@ static int64_t handle_inter_mode(
#else
rd_stats->rate += AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
cost_mv_ref(cpi, NEARESTMV, mode_ctx));
-#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER
} else {
rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx);
}
@@ -8688,6 +7730,7 @@ static int64_t handle_inter_mode(
#endif // CONFIG_MOTION_VAR
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
if (is_comp_pred) {
int rate_sum, rs2;
int64_t dist_sum;
@@ -8705,6 +7748,9 @@ static int64_t handle_inter_mode(
int strides[1] = { bw };
int tmp_rate_mv;
int masked_compound_used = is_any_masked_compound_used(bsize);
+#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
+ masked_compound_used = masked_compound_used && cm->allow_masked_compound;
+#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
COMPOUND_TYPE cur_type;
best_mv[0].as_int = cur_mv[0].as_int;
@@ -8714,8 +7760,6 @@ static int64_t handle_inter_mode(
uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE];
best_compound_data.seg_mask = tmp_mask_buf;
#endif // CONFIG_COMPOUND_SEGMENT
- av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
- av1_compound_type_tree);
if (masked_compound_used) {
av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
@@ -8728,6 +7772,7 @@ static int64_t handle_inter_mode(
}
for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
+ if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
if (!is_interinter_compound_used(cur_type, bsize)) break;
tmp_rate_mv = rate_mv;
best_rd_cur = INT64_MAX;
@@ -8740,7 +7785,8 @@ static int64_t handle_inter_mode(
switch (cur_type) {
case COMPOUND_AVERAGE:
- av1_build_inter_predictors_sby(xd, mi_row, mi_col, &orig_dst, bsize);
+ av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
+ bsize);
av1_subtract_plane(x, bsize, 0);
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb,
@@ -8830,13 +7876,14 @@ static int64_t handle_inter_mode(
pred_exists = 0;
- *args->compmode_interinter_cost =
+ compmode_interinter_cost =
av1_cost_literal(get_interinter_compound_type_bits(
bsize, mbmi->interinter_compound_type)) +
(masked_compound_used
? compound_type_cost[mbmi->interinter_compound_type]
: 0);
}
+#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
#if CONFIG_INTERINTRA
if (is_comp_interintra_pred) {
@@ -8863,7 +7910,7 @@ static int64_t handle_inter_mode(
xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
xd->plane[j].dst.stride = bw;
}
- av1_build_inter_predictors_sby(xd, mi_row, mi_col, &orig_dst, bsize);
+ av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize);
restore_dst_buf(xd, orig_dst);
mbmi->ref_frame[1] = INTRA_FRAME;
mbmi->use_wedge_interintra = 0;
@@ -8876,7 +7923,8 @@ static int64_t handle_inter_mode(
av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
+ rd =
+ RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum);
if (rd < best_interintra_rd) {
best_interintra_rd = rd;
best_interintra_mode = mbmi->interintra_mode;
@@ -8907,7 +7955,7 @@ static int64_t handle_inter_mode(
if (rd != INT64_MAX)
rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum,
dist_sum);
- best_interintra_rd_nowedge = rd;
+ best_interintra_rd_nowedge = best_interintra_rd;
// Disable wedge search if source variance is small
if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
@@ -8926,17 +7974,18 @@ static int64_t handle_inter_mode(
// get negative of mask
const uint8_t *mask = av1_get_contiguous_soft_mask(
mbmi->interintra_wedge_index, 1, bsize);
- do_masked_motion_search(cpi, x, mask, bw, bsize, mi_row, mi_col,
- &tmp_mv, &tmp_rate_mv, 0);
+ tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
+ compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
+ mi_col, intrapred, mask, bw,
+ &tmp_rate_mv, 0, 0);
mbmi->mv[0].as_int = tmp_mv.as_int;
- av1_build_inter_predictors_sby(xd, mi_row, mi_col, &orig_dst, bsize);
+ av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
+ bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv,
rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
- if (rd < best_interintra_rd_wedge) {
- best_interintra_rd_wedge = rd;
- } else {
+ if (rd >= best_interintra_rd_wedge) {
tmp_mv.as_int = cur_mv[0].as_int;
tmp_rate_mv = rate_mv;
}
@@ -8956,37 +8005,33 @@ static int64_t handle_inter_mode(
best_interintra_rd_wedge = rd;
if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
mbmi->use_wedge_interintra = 1;
- best_interintra_rd = best_interintra_rd_wedge;
mbmi->mv[0].as_int = tmp_mv.as_int;
rd_stats->rate += tmp_rate_mv - rate_mv;
rate_mv = tmp_rate_mv;
} else {
mbmi->use_wedge_interintra = 0;
- best_interintra_rd = best_interintra_rd_nowedge;
mbmi->mv[0].as_int = cur_mv[0].as_int;
}
} else {
mbmi->use_wedge_interintra = 0;
- best_interintra_rd = best_interintra_rd_nowedge;
}
}
#endif // CONFIG_WEDGE
pred_exists = 0;
- *args->compmode_interintra_cost =
- av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1);
- *args->compmode_interintra_cost +=
+ compmode_interintra_cost =
+ av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1) +
interintra_mode_cost[mbmi->interintra_mode];
if (is_interintra_wedge_used(bsize)) {
- *args->compmode_interintra_cost += av1_cost_bit(
+ compmode_interintra_cost += av1_cost_bit(
cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
if (mbmi->use_wedge_interintra) {
- *args->compmode_interintra_cost +=
+ compmode_interintra_cost +=
av1_cost_literal(get_interintra_wedge_bits(bsize));
}
}
} else if (is_interintra_allowed(mbmi)) {
- *args->compmode_interintra_cost =
+ compmode_interintra_cost =
av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 0);
}
#endif // CONFIG_INTERINTRA
@@ -8994,7 +8039,7 @@ static int64_t handle_inter_mode(
if (pred_exists == 0) {
int tmp_rate;
int64_t tmp_dist;
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, &orig_dst, bsize);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
&tmp_dist, &skip_txfm_sb, &skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
@@ -9034,10 +8079,23 @@ static int64_t handle_inter_mode(
}
}
+#if CONFIG_EXT_INTER
+#if CONFIG_INTERINTRA
+ rd_stats->rate += compmode_interintra_cost;
+#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+ rate2_bmc_nocoeff += compmode_interintra_cost;
+#endif
+#endif
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+ rd_stats->rate += compmode_interinter_cost;
+#endif
+#endif
+
ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
disable_skip, mode_mv, mi_row, mi_col, args,
ref_best_rd, refs, rate_mv,
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+ single_newmv,
#if CONFIG_EXT_INTER
rate2_bmc_nocoeff, &best_bmc_mbmi,
#if CONFIG_MOTION_VAR
@@ -9060,34 +8118,36 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *const xd = &x->e_mbd;
const TileInfo *tile = &xd->tile;
+#if CONFIG_EC_ADAPT
+ FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
+#else
+ FRAME_CONTEXT *const ec_ctx = cm->fc;
+#endif // CONFIG_EC_ADAPT
MODE_INFO *const mi = xd->mi[0];
const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
const int w = block_size_wide[bsize];
const int h = block_size_high[bsize];
const int sb_row = mi_row / MAX_MIB_SIZE;
+ const int sb_col = mi_col / MAX_MIB_SIZE;
- int_mv dv_ref;
- av1_find_ref_dv(&dv_ref, mi_row, mi_col);
-
- const MvLimits tmp_mv_limits = x->mv_limits;
+ MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
+ int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
+ av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
+ mbmi_ext->ref_mv_stack[ref_frame],
+#if CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_EXT_INTER
+ candidates, mi_row, mi_col, NULL, NULL,
+ mbmi_ext->mode_context);
- // TODO(aconverse@google.com): Handle same row DV.
- x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
- x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
- x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
- x->mv_limits.row_max = (sb_row * MAX_MIB_SIZE - mi_row) * MI_SIZE - h;
- assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
- assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
- assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
- assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
- av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
+ int_mv nearestmv, nearmv;
+ av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
- if (x->mv_limits.col_max < x->mv_limits.col_min ||
- x->mv_limits.row_max < x->mv_limits.row_min) {
- x->mv_limits = tmp_mv_limits;
- return INT64_MAX;
- }
+ int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
+ if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, mi_row, mi_col);
+ mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref;
struct buf_2d yv12_mb[MAX_MB_PLANE];
av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL);
@@ -9095,86 +8155,140 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
xd->plane[i].pre[0] = yv12_mb[i];
}
- int step_param = cpi->mv_step_param;
- MV mvp_full = dv_ref.as_mv;
- mvp_full.col >>= 3;
- mvp_full.row >>= 3;
- int sadpb = x->sadperbit16;
- int cost_list[5];
- int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
- sadpb, cond_cost_list(cpi, cost_list),
- &dv_ref.as_mv, INT_MAX, 1);
+ enum IntrabcMotionDirection {
+ IBC_MOTION_ABOVE,
+ IBC_MOTION_LEFT,
+ IBC_MOTION_DIRECTIONS
+ };
- x->mv_limits = tmp_mv_limits;
- if (bestsme == INT_MAX) return INT64_MAX;
- mvp_full = x->best_mv.as_mv;
- MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
- if (mv_check_bounds(&x->mv_limits, &dv)) return INT64_MAX;
- if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) return INT64_MAX;
MB_MODE_INFO *mbmi = &mi->mbmi;
MB_MODE_INFO best_mbmi = *mbmi;
RD_STATS best_rdcost = *rd_cost;
int best_skip = x->skip;
+
+ for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
+ dir < IBC_MOTION_DIRECTIONS; ++dir) {
+ const MvLimits tmp_mv_limits = x->mv_limits;
+ switch (dir) {
+ case IBC_MOTION_ABOVE:
+ x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
+ x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
+ x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
+ x->mv_limits.row_max = (sb_row * MAX_MIB_SIZE - mi_row) * MI_SIZE - h;
+ break;
+ case IBC_MOTION_LEFT:
+ x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
+ x->mv_limits.col_max = (sb_col * MAX_MIB_SIZE - mi_col) * MI_SIZE - w;
+ // TODO(aconverse@google.com): Minimize the overlap between above and
+ // left areas.
+ x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
+ int bottom_coded_mi_edge =
+ AOMMIN((sb_row + 1) * MAX_MIB_SIZE, tile->mi_row_end);
+ x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
+ break;
+ default: assert(0);
+ }
+ assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
+ assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
+ assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
+ assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
+ av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
+
+ if (x->mv_limits.col_max < x->mv_limits.col_min ||
+ x->mv_limits.row_max < x->mv_limits.row_min) {
+ x->mv_limits = tmp_mv_limits;
+ continue;
+ }
+
+ int step_param = cpi->mv_step_param;
+ MV mvp_full = dv_ref.as_mv;
+ mvp_full.col >>= 3;
+ mvp_full.row >>= 3;
+ int sadpb = x->sadperbit16;
+ int cost_list[5];
+ int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
+ sadpb, cond_cost_list(cpi, cost_list),
+ &dv_ref.as_mv, INT_MAX, 1);
+
+ x->mv_limits = tmp_mv_limits;
+ if (bestsme == INT_MAX) continue;
+ mvp_full = x->best_mv.as_mv;
+ MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
+ if (mv_check_bounds(&x->mv_limits, &dv)) continue;
+ if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) continue;
+
#if CONFIG_PALETTE
- memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
+ memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
#endif
- mbmi->use_intrabc = 1;
- mbmi->mode = DC_PRED;
- mbmi->uv_mode = DC_PRED;
- mbmi->mv[0].as_mv = dv;
+ mbmi->use_intrabc = 1;
+ mbmi->mode = DC_PRED;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->mv[0].as_mv = dv;
#if CONFIG_DUAL_FILTER
- for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
+ for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
#else
- mbmi->interp_filter = BILINEAR;
+ mbmi->interp_filter = BILINEAR;
#endif
- mbmi->skip = 0;
- x->skip = 0;
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, bsize);
-
- int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost, x->mvcost,
- MV_COST_WEIGHT);
- const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0);
- const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0);
- const int rate_mode =
- cpi->y_mode_costs[A][L][DC_PRED] + av1_cost_bit(INTRABC_PROB, 1);
-
- RD_STATS rd_stats, rd_stats_uv;
- av1_subtract_plane(x, bsize, 0);
- super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
- super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
- av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
+ mbmi->skip = 0;
+ x->skip = 0;
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
+
+ int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
+ const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0);
+ const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0);
+ const int rate_mode = cpi->y_mode_costs[A][L][DC_PRED] +
+ av1_cost_bit(ec_ctx->intrabc_prob, 1);
+
+ RD_STATS rd_stats, rd_stats_uv;
+ av1_subtract_plane(x, bsize, 0);
+ super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
+ super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+ av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
#if CONFIG_RD_DEBUG
- mbmi->rd_stats = rd_stats;
+ mbmi->rd_stats = rd_stats;
#endif
- const aom_prob skip_prob = av1_get_skip_prob(cm, xd);
-
- RD_STATS rdc_noskip;
- av1_init_rd_stats(&rdc_noskip);
- rdc_noskip.rate =
- rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0);
- rdc_noskip.dist = rd_stats.dist;
- rdc_noskip.rdcost =
- RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist);
- if (rdc_noskip.rdcost < best_rd) {
- best_rd = rdc_noskip.rdcost;
- best_mbmi = *mbmi;
- best_skip = x->skip;
- best_rdcost = rdc_noskip;
- }
+#if CONFIG_VAR_TX
+ // TODO(aconverse@google.com): Evaluate allowing VAR TX on intrabc blocks
+ const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
+ const int height = block_size_high[bsize] >> tx_size_high_log2[0];
+ int idx, idy;
+ for (idy = 0; idy < height; ++idy)
+ for (idx = 0; idx < width; ++idx)
+ mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
+ mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
+#endif // CONFIG_VAR_TX
- x->skip = 1;
- mbmi->skip = 1;
- RD_STATS rdc_skip;
- av1_init_rd_stats(&rdc_skip);
- rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1);
- rdc_skip.dist = rd_stats.sse;
- rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist);
- if (rdc_skip.rdcost < best_rd) {
- best_rd = rdc_skip.rdcost;
- best_mbmi = *mbmi;
- best_skip = x->skip;
- best_rdcost = rdc_skip;
+ const aom_prob skip_prob = av1_get_skip_prob(cm, xd);
+
+ RD_STATS rdc_noskip;
+ av1_init_rd_stats(&rdc_noskip);
+ rdc_noskip.rate =
+ rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0);
+ rdc_noskip.dist = rd_stats.dist;
+ rdc_noskip.rdcost =
+ RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist);
+ if (rdc_noskip.rdcost < best_rd) {
+ best_rd = rdc_noskip.rdcost;
+ best_mbmi = *mbmi;
+ best_skip = x->skip;
+ best_rdcost = rdc_noskip;
+ }
+
+ x->skip = 1;
+ mbmi->skip = 1;
+ RD_STATS rdc_skip;
+ av1_init_rd_stats(&rdc_skip);
+ rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1);
+ rdc_skip.dist = rd_stats.sse;
+ rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist);
+ if (rdc_skip.rdcost < best_rd) {
+ best_rd = rdc_skip.rdcost;
+ best_mbmi = *mbmi;
+ best_skip = x->skip;
+ best_rdcost = rdc_skip;
+ }
}
*mbmi = best_mbmi;
*rd_cost = best_rdcost;
@@ -9200,6 +8314,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
#if CONFIG_INTRABC
xd->mi[0]->mbmi.use_intrabc = 0;
+ xd->mi[0]->mbmi.mv[0].as_int = 0;
#endif // CONFIG_INTRABC
const int64_t intra_yrd =
@@ -9212,11 +8327,8 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
if (intra_yrd < best_rd) {
max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size]
[pd[1].subsampling_x][pd[1].subsampling_y];
-
+ init_sbuv_mode(&xd->mi[0]->mbmi);
#if CONFIG_CB4X4
-#if !CONFIG_CHROMA_2X2
- max_uv_tx_size = AOMMAX(max_uv_tx_size, TX_4X4);
-#endif // !CONFIG_CHROMA_2X2
if (!x->skip_chroma_rd)
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
&uv_skip, bsize, max_uv_tx_size);
@@ -9235,6 +8347,9 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
rd_cost->dist = dist_y + dist_uv;
}
rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ rd_cost->dist_y = dist_y;
+#endif
} else {
rd_cost->rate = INT_MAX;
}
@@ -9602,10 +8717,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_pred_rd[REFERENCE_MODES];
MB_MODE_INFO best_mbmode;
-#if CONFIG_REF_MV
int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
-#endif // CONFIG_REF_MV
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
@@ -9635,13 +8748,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
int best_skip2 = 0;
uint8_t ref_frame_skip_mask[2] = { 0 };
-#if CONFIG_EXT_INTER
uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
+#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
int64_t best_single_inter_rd = INT64_MAX;
-#else
- uint16_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
-#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
int mode_skip_start = sf->mode_skip_start + 1;
const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
@@ -9663,8 +8774,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
NULL,
NULL,
NULL,
- NULL,
- NULL,
#else // CONFIG_EXT_INTER
NULL,
#endif // CONFIG_EXT_INTER
@@ -9681,15 +8790,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
const MODE_INFO *left_mi = xd->left_mi;
#endif // CONFIG_PALETTE
#if CONFIG_MOTION_VAR
-#if CONFIG_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
-#else
- DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
-#endif // CONFIG_HIGHBITDEPTH
- DECLARE_ALIGNED(16, int32_t, weighted_src_buf[MAX_SB_SQUARE]);
- DECLARE_ALIGNED(16, int32_t, mask2d_buf[MAX_SB_SQUARE]);
int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
@@ -9698,22 +8798,24 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t);
- args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
- args.above_pred_buf[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
+ args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
+ args.above_pred_buf[1] =
+ CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
args.above_pred_buf[2] =
- CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_SB_SQUARE * len);
- args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
- args.left_pred_buf[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
+ CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len);
+ args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
+ args.left_pred_buf[1] =
+ CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
args.left_pred_buf[2] =
- CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_SB_SQUARE * len);
+ CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len);
} else {
#endif // CONFIG_HIGHBITDEPTH
- args.above_pred_buf[0] = tmp_buf1;
- args.above_pred_buf[1] = tmp_buf1 + MAX_SB_SQUARE;
- args.above_pred_buf[2] = tmp_buf1 + 2 * MAX_SB_SQUARE;
- args.left_pred_buf[0] = tmp_buf2;
- args.left_pred_buf[1] = tmp_buf2 + MAX_SB_SQUARE;
- args.left_pred_buf[2] = tmp_buf2 + 2 * MAX_SB_SQUARE;
+ args.above_pred_buf[0] = x->above_pred_buf;
+ args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE;
+ args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE;
+ args.left_pred_buf[0] = x->left_pred_buf;
+ args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE;
+ args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE;
#if CONFIG_HIGHBITDEPTH
}
#endif // CONFIG_HIGHBITDEPTH
@@ -9731,11 +8833,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
}
#endif // CONFIG_PALETTE
-#if CONFIG_EXT_INTRA
- memset(directional_mode_skip_mask, 0,
- sizeof(directional_mode_skip_mask[0]) * INTRA_MODES);
-#endif // CONFIG_EXT_INTRA
-
estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
&comp_mode_p);
@@ -9756,9 +8853,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
x->mbmi_ext->mode_context[ref_frame] = 0;
-#if CONFIG_REF_MV && CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER
x->mbmi_ext->compound_mode_context[ref_frame] = 0;
-#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
@@ -9788,7 +8885,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_EXT_INTER
}
-#if CONFIG_REF_MV
for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
MODE_INFO *const mi = xd->mi[0];
int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
@@ -9813,10 +8909,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
}
}
-#endif // CONFIG_REF_MV
#if CONFIG_MOTION_VAR
av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
+
if (check_num_overlappable_neighbors(mbmi) &&
is_motion_variation_allowed_bsize(bsize)) {
av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
@@ -9827,8 +8923,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
dst_height2, args.left_pred_stride);
av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
mi_col);
- x->mask_buf = mask2d_buf;
- x->wsrc_buf = weighted_src_buf;
calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0],
args.above_pred_stride[0], args.left_pred_buf[0],
args.left_pred_stride[0]);
@@ -9904,10 +8998,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#if CONFIG_EXT_INTER
if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
- if (frame_mv[NEAREST_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
- mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARMV);
- if (frame_mv[NEAR_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
- mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARESTMV);
if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
#endif // CONFIG_EXT_INTER
@@ -9931,7 +9021,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (sf->adaptive_mode_search) {
if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
cpi->rc.frames_since_golden >= 3)
- if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
+ if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
}
@@ -9985,18 +9075,16 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int64_t this_rd = INT64_MAX;
int disable_skip = 0;
int compmode_cost = 0;
-#if CONFIG_EXT_INTER
- int compmode_interintra_cost = 0;
- int compmode_interinter_cost = 0;
-#endif // CONFIG_EXT_INTER
int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ int64_t distortion2_y = 0;
+ int64_t total_sse_y = INT64_MAX;
+#endif
int skippable = 0;
int this_skip2 = 0;
int64_t total_sse = INT64_MAX;
-#if CONFIG_REF_MV
uint8_t ref_frame_type;
-#endif // CONFIG_REF_MV
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, &pre_buf);
#endif // CONFIG_PVQ
@@ -10004,9 +9092,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
this_mode = av1_mode_order[mode_index].mode;
ref_frame = av1_mode_order[mode_index].ref_frame[0];
second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
-#if CONFIG_REF_MV
mbmi->ref_mv_idx = 0;
-#endif // CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
@@ -10079,7 +9165,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
// This is only used in motion vector unit test.
if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
-#if CONFIG_LOWDELAY_COMPOUND // Changes LL bitstream
+#if CONFIG_ONE_SIDED_COMPOUND // Changes LL bitstream
#if CONFIG_EXT_REFS
if (cpi->oxcf.pass == 0) {
// Complexity-compression trade-offs
@@ -10144,9 +9230,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_GLOBAL_MOTION
const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
-#if CONFIG_REF_MV && CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER
mbmi_ext->compound_mode_context,
-#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER
frame_mv, this_mode, ref_frames, bsize, -1,
mi_row, mi_col))
continue;
@@ -10181,9 +9267,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
-#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
if (ref_frame == INTRA_FRAME) {
RD_STATS rd_stats_y;
@@ -10199,11 +9285,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
const uint8_t *src = x->plane[0].src.buf;
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- highbd_angle_estimation(src, src_stride, rows, cols,
+ highbd_angle_estimation(src, src_stride, rows, cols, bsize,
directional_mode_skip_mask);
else
#endif // CONFIG_HIGHBITDEPTH
- angle_estimation(src, src_stride, rows, cols,
+ angle_estimation(src, src_stride, rows, cols, bsize,
directional_mode_skip_mask);
angle_stats_ready = 1;
}
@@ -10336,18 +9422,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) distortion2_y = distortion_y;
+#endif
} else {
-#if CONFIG_REF_MV
int_mv backup_ref_mv[2];
#if !SUB8X8_COMP_REF
- if (bsize < BLOCK_8X8 && mbmi->ref_frame[1] > INTRA_FRAME) continue;
+ if (bsize == BLOCK_4X4 && mbmi->ref_frame[1] > INTRA_FRAME) continue;
#endif // !SUB8X8_COMP_REF
backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
-#endif // CONFIG_REF_MV
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
if (second_ref_frame == INTRA_FRAME) {
if (best_single_inter_ref != ref_frame) continue;
mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode];
@@ -10365,8 +9452,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
#endif // CONFIG_FILTER_INTRA
}
-#endif // CONFIG_EXT_INTER
-#if CONFIG_REF_MV
+#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
mbmi->ref_mv_idx = 0;
ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
@@ -10411,7 +9497,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#if CONFIG_EXT_INTER
}
#endif // CONFIG_EXT_INTER
-#endif // CONFIG_REF_MV
{
RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
av1_init_rd_stats(&rd_stats);
@@ -10421,18 +9506,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
args.single_newmv = single_newmv;
#if CONFIG_EXT_INTER
args.single_newmv_rate = single_newmv_rate;
- args.compmode_interintra_cost = &compmode_interintra_cost;
- args.compmode_interinter_cost = &compmode_interinter_cost;
args.modelled_rd = modelled_rd;
#endif // CONFIG_EXT_INTER
this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
&rd_stats_uv, &disable_skip, frame_mv,
mi_row, mi_col, &args, best_rd);
-// Prevent pointers from escaping local scope
-#if CONFIG_EXT_INTER
- args.compmode_interintra_cost = NULL;
- args.compmode_interinter_cost = NULL;
-#endif // CONFIG_EXT_INTER
rate2 = rd_stats.rate;
skippable = rd_stats.skip;
@@ -10440,9 +9518,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
total_sse = rd_stats.sse;
rate_y = rd_stats_y.rate;
rate_uv = rd_stats_uv.rate;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist;
+#endif
}
-#if CONFIG_REF_MV
// TODO(jingning): This needs some refactoring to improve code quality
// and reduce redundant steps.
#if CONFIG_EXT_INTER
@@ -10505,10 +9585,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int ref;
int_mv cur_mv;
RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
-#if CONFIG_EXT_INTER
- int tmp_compmode_interintra_cost = 0;
- int tmp_compmode_interinter_cost = 0;
-#endif // CONFIG_EXT_INTER
av1_invalid_rd_stats(&tmp_rd_stats);
x->skip = 0;
@@ -10586,8 +9662,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
args.single_newmv = dummy_single_newmv;
#if CONFIG_EXT_INTER
args.single_newmv_rate = dummy_single_newmv_rate;
- args.compmode_interintra_cost = &tmp_compmode_interintra_cost;
- args.compmode_interinter_cost = &tmp_compmode_interinter_cost;
args.modelled_rd = NULL;
#endif // CONFIG_EXT_INTER
tmp_alt_rd = handle_inter_mode(
@@ -10597,8 +9671,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
args.single_newmv = NULL;
#if CONFIG_EXT_INTER
args.single_newmv_rate = NULL;
- args.compmode_interintra_cost = NULL;
- args.compmode_interinter_cost = NULL;
#endif // CONFIG_EXT_INTER
}
@@ -10658,15 +9730,17 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
tmp_ref_rd = tmp_alt_rd;
backup_mbmi = *mbmi;
backup_skip = x->skip;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) {
+ total_sse_y = tmp_rd_stats_y.sse;
+ distortion2_y = tmp_rd_stats_y.dist;
+ }
+#endif
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
memcpy(x->blk_skip_drl[i], x->blk_skip[i],
sizeof(uint8_t) * ctx->num_4x4_blk);
#endif // CONFIG_VAR_TX
-#if CONFIG_EXT_INTER
- compmode_interintra_cost = tmp_compmode_interintra_cost;
- compmode_interinter_cost = tmp_compmode_interinter_cost;
-#endif // CONFIG_EXT_INTER
} else {
*mbmi = backup_mbmi;
x->skip = backup_skip;
@@ -10684,29 +9758,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
}
mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
-#endif // CONFIG_REF_MV
if (this_rd == INT64_MAX) continue;
#if SUB8X8_COMP_REF
compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
#else
- if (mbmi->sb_type >= BLOCK_8X8)
+ if (mbmi->sb_type != BLOCK_4X4)
compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
#endif // SUB8X8_COMP_REF
if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
}
-#if CONFIG_EXT_INTER
- rate2 += compmode_interintra_cost;
- if (cm->reference_mode != SINGLE_REFERENCE && comp_pred)
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- if (mbmi->motion_mode == SIMPLE_TRANSLATION)
-#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
- rate2 += compmode_interinter_cost;
-#endif // CONFIG_EXT_INTER
-
// Estimate the reference frame signaling cost and add it
// to the rolling cost variable.
if (comp_pred) {
@@ -10731,14 +9795,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
// Cost the skip mb case
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
} else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
-#if CONFIG_REF_MV
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0,
distortion2) <
RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) {
-#else
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
- RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
-#endif // CONFIG_REF_MV
// Add in the cost of the no skip flag.
rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
} else {
@@ -10750,6 +9809,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
this_skip2 = 1;
rate_y = 0;
rate_uv = 0;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) distortion2_y = total_sse_y;
+#endif
}
} else {
// Add in the cost of the no skip flag.
@@ -10775,13 +9837,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
best_intra_rd = this_rd;
best_intra_mode = mbmi->mode;
}
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
} else if (second_ref_frame == NONE_FRAME) {
if (this_rd < best_single_inter_rd) {
best_single_inter_rd = this_rd;
best_single_inter_ref = mbmi->ref_frame[0];
}
-#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
}
if (!disable_skip && ref_frame == INTRA_FRAME) {
@@ -10839,7 +9901,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
this_skip2 || skippable);
best_rate_uv = rate_uv;
-
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y;
+#endif
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
memcpy(ctx->blk_skip[i], x->blk_skip[i],
@@ -10900,7 +9964,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
}
if (is_inter_mode(mbmi->mode)) {
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, bsize);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
#if CONFIG_MOTION_VAR
if (mbmi->motion_mode == OBMC_CAUSAL) {
av1_build_obmc_inter_prediction(
@@ -10967,6 +10031,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
rd_cost->rate +=
(rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist;
+#endif
rd_cost->rdcost =
RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
best_skip2 = skip_blk;
@@ -11111,9 +10178,7 @@ PALETTE_EXIT:
best_mbmode.ref_frame[1] };
int comp_pred_mode = refs[1] > INTRA_FRAME;
int_mv zeromv[2];
-#if CONFIG_REF_MV
const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
-#endif // CONFIG_REF_MV
#if CONFIG_GLOBAL_MOTION
zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
cm->allow_high_precision_mv, bsize,
@@ -11129,7 +10194,6 @@ PALETTE_EXIT:
zeromv[0].as_int = 0;
zeromv[1].as_int = 0;
#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_REF_MV
if (!comp_pred_mode) {
int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
@@ -11196,17 +10260,9 @@ PALETTE_EXIT:
nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
- // Try switching to the NEAR_NEAREST type modes first
- if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
+ // Try switching to the NEAR_NEARMV mode
+ if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
nearmv[1].as_int == best_mbmode.mv[1].as_int) {
- best_mbmode.mode = NEAREST_NEARMV;
- best_mbmode.ref_mv_idx = i;
- } else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
- nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
- best_mbmode.mode = NEAR_NEARESTMV;
- best_mbmode.ref_mv_idx = i;
- } else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
- nearmv[1].as_int == best_mbmode.mv[1].as_int) {
best_mbmode.mode = NEAR_NEARMV;
best_mbmode.ref_mv_idx = i;
}
@@ -11225,72 +10281,8 @@ PALETTE_EXIT:
}
#endif // CONFIG_EXT_INTER
}
-#else
-#if CONFIG_EXT_INTER
- if (!comp_pred_mode) {
-#endif // CONFIG_EXT_INTER
- if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
- ((comp_pred_mode &&
- frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
- !comp_pred_mode))
- best_mbmode.mode = NEARESTMV;
- else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
- ((comp_pred_mode &&
- frame_mv[NEARMV][refs[1]].as_int ==
- best_mbmode.mv[1].as_int) ||
- !comp_pred_mode))
- best_mbmode.mode = NEARMV;
- else if (best_mbmode.mv[0].as_int == zeromv[0].as_int &&
- ((comp_pred_mode &&
- best_mbmode.mv[1].as_int == zeromv[1].as_int) ||
- !comp_pred_mode))
- best_mbmode.mode = ZEROMV;
-#if CONFIG_EXT_INTER
- } else {
-#if CONFIG_GLOBAL_MOTION
- zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
- cm->allow_high_precision_mv,
- bsize, mi_col, mi_row, 0)
- .as_int;
- zeromv[1].as_int = comp_pred_mode
- ? gm_get_motion_vector(&cm->global_motion[refs[1]],
- cm->allow_high_precision_mv,
- bsize, mi_col, mi_row, 0)
- .as_int
- : 0;
-#else
- zeromv[0].as_int = 0;
- zeromv[1].as_int = 0;
-#endif // CONFIG_GLOBAL_MOTION
- if (frame_mv[NEAREST_NEARESTMV][refs[0]].as_int ==
- best_mbmode.mv[0].as_int &&
- frame_mv[NEAREST_NEARESTMV][refs[1]].as_int ==
- best_mbmode.mv[1].as_int)
- best_mbmode.mode = NEAREST_NEARESTMV;
- else if (frame_mv[NEAREST_NEARMV][refs[0]].as_int ==
- best_mbmode.mv[0].as_int &&
- frame_mv[NEAREST_NEARMV][refs[1]].as_int ==
- best_mbmode.mv[1].as_int)
- best_mbmode.mode = NEAREST_NEARMV;
- else if (frame_mv[NEAR_NEARESTMV][refs[0]].as_int ==
- best_mbmode.mv[0].as_int &&
- frame_mv[NEAR_NEARESTMV][refs[1]].as_int ==
- best_mbmode.mv[1].as_int)
- best_mbmode.mode = NEAR_NEARESTMV;
- else if (frame_mv[NEAR_NEARMV][refs[0]].as_int ==
- best_mbmode.mv[0].as_int &&
- frame_mv[NEAR_NEARMV][refs[1]].as_int ==
- best_mbmode.mv[1].as_int)
- best_mbmode.mode = NEAR_NEARMV;
- else if (best_mbmode.mv[0].as_int == zeromv[0].as_int &&
- best_mbmode.mv[1].as_int == zeromv[1].as_int)
- best_mbmode.mode = ZERO_ZEROMV;
- }
-#endif // CONFIG_EXT_INTER
-#endif // CONFIG_REF_MV
}
-#if CONFIG_REF_MV
// Make sure that the ref_mv_idx is only nonzero when we're
// using a mode which can support ref_mv_idx
if (best_mbmode.ref_mv_idx != 0 &&
@@ -11339,7 +10331,6 @@ PALETTE_EXIT:
}
}
}
-#endif // CONFIG_REF_MV
if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
rd_cost->rate = INT_MAX;
@@ -11412,14 +10403,12 @@ PALETTE_EXIT:
}
#endif // CONFIG_GLOBAL_MOTION
-#if CONFIG_REF_MV
for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
if (mbmi->mode != NEWMV)
mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
else
mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
}
-#endif // CONFIG_REF_MV
for (i = 0; i < REFERENCE_MODES; ++i) {
if (best_pred_rd[i] == INT64_MAX)
@@ -11502,10 +10491,8 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
mbmi->tx_size = max_txsize_lookup[bsize];
x->skip = 1;
-#if CONFIG_REF_MV
mbmi->ref_mv_idx = 0;
mbmi->pred_mv[0].as_int = 0;
-#endif // CONFIG_REF_MV
mbmi->motion_mode = SIMPLE_TRANSLATION;
#if CONFIG_MOTION_VAR
@@ -11566,7 +10553,9 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
rd_cost->rate = rate2;
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
-
+#if CONFIG_DAALA_DIST && CONFIG_CB4X4
+ if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
+#endif
if (this_rd >= best_rd_so_far) {
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
@@ -11589,791 +10578,6 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0);
}
-void av1_rd_pick_inter_mode_sub8x8(const struct AV1_COMP *cpi,
- TileDataEnc *tile_data, struct macroblock *x,
- int mi_row, int mi_col,
- struct RD_STATS *rd_cost,
-#if CONFIG_SUPERTX
- int *returnrate_nocoef,
-#endif // CONFIG_SUPERTX
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far) {
- const AV1_COMMON *const cm = &cpi->common;
- const RD_OPT *const rd_opt = &cpi->rd;
- const SPEED_FEATURES *const sf = &cpi->sf;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const struct segmentation *const seg = &cm->seg;
- MV_REFERENCE_FRAME ref_frame, second_ref_frame;
- unsigned char segment_id = mbmi->segment_id;
- int comp_pred, i;
- int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
- struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
- static const int flag_list[TOTAL_REFS_PER_FRAME] = {
- 0,
- AOM_LAST_FLAG,
-#if CONFIG_EXT_REFS
- AOM_LAST2_FLAG,
- AOM_LAST3_FLAG,
-#endif // CONFIG_EXT_REFS
- AOM_GOLD_FLAG,
-#if CONFIG_EXT_REFS
- AOM_BWD_FLAG,
-#endif // CONFIG_EXT_REFS
- AOM_ALT_FLAG
- };
- int64_t best_rd = best_rd_so_far;
- int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
- int64_t best_pred_diff[REFERENCE_MODES];
- int64_t best_pred_rd[REFERENCE_MODES];
- MB_MODE_INFO best_mbmode;
- int ref_index, best_ref_index = 0;
- unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
- unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
- aom_prob comp_mode_p;
-#if CONFIG_DUAL_FILTER
- InterpFilter tmp_best_filter[4] = { 0 };
-#else
- InterpFilter tmp_best_filter = SWITCHABLE;
-#endif // CONFIG_DUAL_FILTER
- int rate_uv_intra, rate_uv_tokenonly = INT_MAX;
- int64_t dist_uv = INT64_MAX;
- int skip_uv;
- PREDICTION_MODE mode_uv = DC_PRED;
- const int intra_cost_penalty = av1_get_intra_cost_penalty(
- cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
- int_mv seg_mvs[4][TOTAL_REFS_PER_FRAME];
- b_mode_info best_bmodes[4];
- int best_skip2 = 0;
- int ref_frame_skip_mask[2] = { 0 };
- int internal_active_edge =
- av1_active_edge_sb(cpi, mi_row, mi_col) && av1_internal_image_edge(cpi);
-#if CONFIG_PVQ
- od_rollback_buffer pre_buf;
-
- od_encode_checkpoint(&x->daala_enc, &pre_buf);
-#endif // CONFIG_PVQ
-
-#if CONFIG_SUPERTX
- best_rd_so_far = INT64_MAX;
- best_rd = best_rd_so_far;
- best_yrd = best_rd_so_far;
-#endif // CONFIG_SUPERTX
- av1_zero(best_mbmode);
-
-#if CONFIG_FILTER_INTRA
- mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
- mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
-#endif // CONFIG_FILTER_INTRA
- mbmi->motion_mode = SIMPLE_TRANSLATION;
-#if CONFIG_EXT_INTER
- mbmi->interinter_compound_type = COMPOUND_AVERAGE;
- mbmi->use_wedge_interintra = 0;
-#endif // CONFIG_EXT_INTER
-#if CONFIG_WARPED_MOTION
- mbmi->num_proj_ref[0] = 0;
- mbmi->num_proj_ref[1] = 0;
-#endif // CONFIG_WARPED_MOTION
-
- for (i = 0; i < 4; i++) {
- int j;
- for (j = 0; j < TOTAL_REFS_PER_FRAME; j++)
- seg_mvs[i][j].as_int = INVALID_MV;
- }
-
- estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
- &comp_mode_p);
-
- for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
- rate_uv_intra = INT_MAX;
-
- rd_cost->rate = INT_MAX;
-#if CONFIG_SUPERTX
- *returnrate_nocoef = INT_MAX;
-#endif // CONFIG_SUPERTX
-
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
- x->mbmi_ext->mode_context[ref_frame] = 0;
-#if CONFIG_REF_MV && CONFIG_EXT_INTER
- x->mbmi_ext->compound_mode_context[ref_frame] = 0;
-#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
- if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
- frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
- } else {
- ref_frame_skip_mask[0] |= (1 << ref_frame);
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- }
- frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
-#if CONFIG_EXT_INTER
-#endif // CONFIG_EXT_INTER
- frame_mv[ZEROMV][ref_frame].as_int = 0;
- }
-
-#if CONFIG_PALETTE
- mbmi->palette_mode_info.palette_size[0] = 0;
- mbmi->palette_mode_info.palette_size[1] = 0;
-#endif // CONFIG_PALETTE
-
- for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
- int mode_excluded = 0;
- int64_t this_rd = INT64_MAX;
- int disable_skip = 0;
- int compmode_cost = 0;
- int rate2 = 0, rate_y = 0, rate_uv = 0;
- int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
- int skippable = 0;
- int this_skip2 = 0;
- int64_t total_sse = INT_MAX;
-
-#if CONFIG_PVQ
- od_encode_rollback(&x->daala_enc, &pre_buf);
-#endif // CONFIG_PVQ
-
- ref_frame = av1_ref_order[ref_index].ref_frame[0];
- second_ref_frame = av1_ref_order[ref_index].ref_frame[1];
-
-#if CONFIG_REF_MV
- mbmi->ref_mv_idx = 0;
-#endif // CONFIG_REF_MV
-
- // Look at the reference frame of the best mode so far and set the
- // skip mask to look at a subset of the remaining modes.
- if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
- if (ref_index == 3) {
- switch (best_mbmode.ref_frame[0]) {
- case INTRA_FRAME: break;
- case LAST_FRAME:
- ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) |
-#if CONFIG_EXT_REFS
- (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
- (1 << BWDREF_FRAME) |
-#endif // CONFIG_EXT_REFS
- (1 << ALTREF_FRAME);
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- break;
-#if CONFIG_EXT_REFS
- case LAST2_FRAME:
- ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST3_FRAME) |
- (1 << GOLDEN_FRAME) |
- (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME);
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- break;
- case LAST3_FRAME:
- ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
- (1 << GOLDEN_FRAME) |
- (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME);
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- break;
-#endif // CONFIG_EXT_REFS
- case GOLDEN_FRAME:
- ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
-#if CONFIG_EXT_REFS
- (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
- (1 << BWDREF_FRAME) |
-#endif // CONFIG_EXT_REFS
- (1 << ALTREF_FRAME);
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- break;
-#if CONFIG_EXT_REFS
- case BWDREF_FRAME:
- ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
- (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) |
- (1 << ALTREF_FRAME);
- ref_frame_skip_mask[1] |= (1 << ALTREF_FRAME) | 0x01;
- break;
-#endif // CONFIG_EXT_REFS
- case ALTREF_FRAME:
- ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
-#if CONFIG_EXT_REFS
- (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
- (1 << BWDREF_FRAME) |
-#endif // CONFIG_EXT_REFS
- (1 << GOLDEN_FRAME);
-#if CONFIG_EXT_REFS
- ref_frame_skip_mask[1] |= (1 << BWDREF_FRAME) | 0x01;
-#endif // CONFIG_EXT_REFS
- break;
- case NONE_FRAME:
- case TOTAL_REFS_PER_FRAME:
- assert(0 && "Invalid Reference frame");
- break;
- }
- }
- }
-
- if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
- (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
- continue;
-
- // Test best rd so far against threshold for trying this mode.
- if (!internal_active_edge &&
- rd_less_than_thresh(best_rd,
- rd_opt->threshes[segment_id][bsize][ref_index],
- tile_data->thresh_freq_fact[bsize][ref_index]))
- continue;
-
- // This is only used in motion vector unit test.
- if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
-
-#if CONFIG_LOWDELAY_COMPOUND // Changes LL bitstream
-#if CONFIG_EXT_REFS
- if (cpi->oxcf.pass == 0) {
- // Complexity-compression trade-offs
- // if (ref_frame == ALTREF_FRAME) continue;
- // if (ref_frame == BWDREF_FRAME) continue;
- if (second_ref_frame == ALTREF_FRAME) continue;
- // if (second_ref_frame == BWDREF_FRAME) continue;
- }
-#endif
-#endif
- comp_pred = second_ref_frame > INTRA_FRAME;
- if (comp_pred) {
- if (!cpi->allow_comp_inter_inter) continue;
- if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
- // Do not allow compound prediction if the segment level reference frame
- // feature is in use as in this case there can only be one reference.
- if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
-
- if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
- best_mbmode.ref_frame[0] == INTRA_FRAME)
- continue;
- }
-
- // TODO(jingning, jkoleszar): scaling reference frame not supported for
- // sub8x8 blocks.
- if (ref_frame > INTRA_FRAME &&
- av1_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
- continue;
-
- if (second_ref_frame > INTRA_FRAME &&
- av1_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
- continue;
-
- if (comp_pred)
- mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
- else if (ref_frame != INTRA_FRAME)
- mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
-
- // If the segment reference frame feature is enabled....
- // then do nothing if the current ref frame is not allowed..
- if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
- continue;
- // Disable this drop out case if the ref frame
- // segment level feature is enabled for this segment. This is to
- // prevent the possibility that we end up unable to pick any mode.
- } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
- // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
- // unless ARNR filtering is enabled in which case we want
- // an unfiltered alternative. We allow near/nearest as well
- // because they may result in zero-zero MVs but be cheaper.
- if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
- continue;
- }
-
- mbmi->tx_size = TX_4X4;
- mbmi->uv_mode = DC_PRED;
- mbmi->ref_frame[0] = ref_frame;
- mbmi->ref_frame[1] = second_ref_frame;
-// Evaluate all sub-pel filters irrespective of whether we can use
-// them for this frame.
-#if CONFIG_DUAL_FILTER
- for (i = 0; i < 4; ++i)
- mbmi->interp_filter[i] = cm->interp_filter == SWITCHABLE
- ? EIGHTTAP_REGULAR
- : cm->interp_filter;
-#else
- mbmi->interp_filter =
- cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR : cm->interp_filter;
-#endif // CONFIG_DUAL_FILTER
- x->skip = 0;
- set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
-
- // Select prediction reference frames.
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
- if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
- }
-
-#if CONFIG_VAR_TX
- mbmi->inter_tx_size[0][0] = mbmi->tx_size;
- mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
-#endif // CONFIG_VAR_TX
-
- if (ref_frame == INTRA_FRAME) {
- int rate;
- if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y,
- NULL, best_rd) >= best_rd)
- continue;
- rate2 += rate;
- rate2 += intra_cost_penalty;
- distortion2 += distortion_y;
-
- if (rate_uv_intra == INT_MAX) {
- choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra,
- &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv);
- }
- rate2 += rate_uv_intra;
- rate_uv = rate_uv_tokenonly;
- distortion2 += dist_uv;
- distortion_uv = dist_uv;
- mbmi->uv_mode = mode_uv;
- } else {
- int rate;
- int64_t distortion;
- int64_t this_rd_thresh;
- int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
- int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
- int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
- int tmp_best_skippable = 0;
- int switchable_filter_index;
- int_mv *second_ref =
- comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
- b_mode_info tmp_best_bmodes[16]; // Should this be 4 ?
- MB_MODE_INFO tmp_best_mbmode;
-#if CONFIG_DUAL_FILTER
- BEST_SEG_INFO bsi[DUAL_FILTER_SET_SIZE];
-#else
- BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
-#endif // CONFIG_DUAL_FILTER
- int pred_exists = 0;
- int uv_skippable;
-#if CONFIG_EXT_INTER
- int_mv compound_seg_newmvs[4][2];
- for (i = 0; i < 4; i++) {
- compound_seg_newmvs[i][0].as_int = INVALID_MV;
- compound_seg_newmvs[i][1].as_int = INVALID_MV;
- }
-#endif // CONFIG_EXT_INTER
-
- this_rd_thresh = (ref_frame == LAST_FRAME)
- ? rd_opt->threshes[segment_id][bsize][THR_LAST]
- : rd_opt->threshes[segment_id][bsize][THR_ALTR];
-#if CONFIG_EXT_REFS
- this_rd_thresh = (ref_frame == LAST2_FRAME)
- ? rd_opt->threshes[segment_id][bsize][THR_LAST2]
- : this_rd_thresh;
- this_rd_thresh = (ref_frame == LAST3_FRAME)
- ? rd_opt->threshes[segment_id][bsize][THR_LAST3]
- : this_rd_thresh;
- this_rd_thresh = (ref_frame == BWDREF_FRAME)
- ? rd_opt->threshes[segment_id][bsize][THR_BWDR]
- : this_rd_thresh;
-#endif // CONFIG_EXT_REFS
- this_rd_thresh = (ref_frame == GOLDEN_FRAME)
- ? rd_opt->threshes[segment_id][bsize][THR_GOLD]
- : this_rd_thresh;
-
- // TODO(any): Add search of the tx_type to improve rd performance at the
- // expense of speed.
- mbmi->tx_type = DCT_DCT;
-
- if (cm->interp_filter != BILINEAR) {
-#if CONFIG_DUAL_FILTER
- tmp_best_filter[0] = EIGHTTAP_REGULAR;
- tmp_best_filter[1] = EIGHTTAP_REGULAR;
- tmp_best_filter[2] = EIGHTTAP_REGULAR;
- tmp_best_filter[3] = EIGHTTAP_REGULAR;
-#else
- tmp_best_filter = EIGHTTAP_REGULAR;
-#endif // CONFIG_DUAL_FILTER
- if (x->source_variance < sf->disable_filter_search_var_thresh) {
-#if CONFIG_DUAL_FILTER
- tmp_best_filter[0] = EIGHTTAP_REGULAR;
-#else
- tmp_best_filter = EIGHTTAP_REGULAR;
-#endif // CONFIG_DUAL_FILTER
- } else if (sf->adaptive_pred_interp_filter == 1 &&
- ctx->pred_interp_filter < SWITCHABLE) {
-#if CONFIG_DUAL_FILTER
- tmp_best_filter[0] = ctx->pred_interp_filter;
-#else
- tmp_best_filter = ctx->pred_interp_filter;
-#endif // CONFIG_DUAL_FILTER
- } else if (sf->adaptive_pred_interp_filter == 2) {
-#if CONFIG_DUAL_FILTER
- tmp_best_filter[0] = ctx->pred_interp_filter < SWITCHABLE
- ? ctx->pred_interp_filter
- : 0;
-#else
- tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE
- ? ctx->pred_interp_filter
- : 0;
-#endif // CONFIG_DUAL_FILTER
- } else {
-#if CONFIG_DUAL_FILTER
- const int filter_set_size = DUAL_FILTER_SET_SIZE;
-#else
- const int filter_set_size = SWITCHABLE_FILTERS;
-#endif // CONFIG_DUAL_FILTER
- for (switchable_filter_index = 0;
- switchable_filter_index < filter_set_size;
- ++switchable_filter_index) {
- int newbest, rs;
- int64_t rs_rd;
- MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
-#if CONFIG_DUAL_FILTER
- mbmi->interp_filter[0] = filter_sets[switchable_filter_index][0];
- mbmi->interp_filter[1] = filter_sets[switchable_filter_index][1];
- mbmi->interp_filter[2] = filter_sets[switchable_filter_index][0];
- mbmi->interp_filter[3] = filter_sets[switchable_filter_index][1];
-#else
- mbmi->interp_filter = switchable_filter_index;
-#endif // CONFIG_DUAL_FILTER
- tmp_rd = rd_pick_inter_best_sub8x8_mode(
- cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
- &rate, &rate_y, &distortion, &skippable, &total_sse,
- (int)this_rd_thresh, seg_mvs,
-#if CONFIG_EXT_INTER
- compound_seg_newmvs,
-#endif // CONFIG_EXT_INTER
- bsi, switchable_filter_index, mi_row, mi_col);
- if (tmp_rd == INT64_MAX) continue;
- rs = av1_get_switchable_rate(cpi, xd);
- rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
- if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd;
-
- newbest = (tmp_rd < tmp_best_rd);
- if (newbest) {
-#if CONFIG_DUAL_FILTER
- tmp_best_filter[0] = mbmi->interp_filter[0];
- tmp_best_filter[1] = mbmi->interp_filter[1];
- tmp_best_filter[2] = mbmi->interp_filter[2];
- tmp_best_filter[3] = mbmi->interp_filter[3];
-#else
- tmp_best_filter = mbmi->interp_filter;
-#endif // CONFIG_DUAL_FILTER
- tmp_best_rd = tmp_rd;
- }
- if ((newbest && cm->interp_filter == SWITCHABLE) ||
- (
-#if CONFIG_DUAL_FILTER
- mbmi->interp_filter[0] == cm->interp_filter
-#else
- mbmi->interp_filter == cm->interp_filter
-#endif // CONFIG_DUAL_FILTER
- && cm->interp_filter != SWITCHABLE)) {
- tmp_best_rdu = tmp_rd;
- tmp_best_rate = rate;
- tmp_best_ratey = rate_y;
- tmp_best_distortion = distortion;
- tmp_best_sse = total_sse;
- tmp_best_skippable = skippable;
- tmp_best_mbmode = *mbmi;
- for (i = 0; i < 4; i++) {
- tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
- }
- pred_exists = 1;
- }
- } // switchable_filter_index loop
- }
- }
-
- if (tmp_best_rdu == INT64_MAX && pred_exists) continue;
-
-#if CONFIG_DUAL_FILTER
- mbmi->interp_filter[0] =
- (cm->interp_filter == SWITCHABLE ? tmp_best_filter[0]
- : cm->interp_filter);
- mbmi->interp_filter[1] =
- (cm->interp_filter == SWITCHABLE ? tmp_best_filter[1]
- : cm->interp_filter);
- mbmi->interp_filter[2] =
- (cm->interp_filter == SWITCHABLE ? tmp_best_filter[2]
- : cm->interp_filter);
- mbmi->interp_filter[3] =
- (cm->interp_filter == SWITCHABLE ? tmp_best_filter[3]
- : cm->interp_filter);
-#else
- mbmi->interp_filter =
- (cm->interp_filter == SWITCHABLE ? tmp_best_filter
- : cm->interp_filter);
-#endif // CONFIG_DUAL_FILTER
-
- if (!pred_exists) {
- // Handles the special case when a filter that is not in the
- // switchable list (bilinear) is indicated at the frame level
- tmp_rd = rd_pick_inter_best_sub8x8_mode(
- cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
- &rate, &rate_y, &distortion, &skippable, &total_sse,
- (int)this_rd_thresh, seg_mvs,
-#if CONFIG_EXT_INTER
- compound_seg_newmvs,
-#endif // CONFIG_EXT_INTER
- bsi, 0, mi_row, mi_col);
- if (tmp_rd == INT64_MAX) continue;
- } else {
- total_sse = tmp_best_sse;
- rate = tmp_best_rate;
- rate_y = tmp_best_ratey;
- distortion = tmp_best_distortion;
- skippable = tmp_best_skippable;
- *mbmi = tmp_best_mbmode;
- for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
- }
- // Add in the cost of the transform type
- if (!xd->lossless[mbmi->segment_id]) {
- int rate_tx_type = 0;
-#if CONFIG_EXT_TX
- if (get_ext_tx_types(mbmi->tx_size, bsize, 1, cm->reduced_tx_set_used) >
- 1) {
- const int eset =
- get_ext_tx_set(mbmi->tx_size, bsize, 1, cm->reduced_tx_set_used);
- rate_tx_type =
- cpi->inter_tx_type_costs[eset][mbmi->tx_size][mbmi->tx_type];
- }
-#else
- if (mbmi->tx_size < TX_32X32) {
- rate_tx_type = cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
- }
-#endif // CONFIG_EXT_TX
- rate += rate_tx_type;
- rate_y += rate_tx_type;
- }
-
- rate2 += rate;
- distortion2 += distortion;
-
- if (cm->interp_filter == SWITCHABLE)
- rate2 += av1_get_switchable_rate(cpi, xd);
-
- if (!mode_excluded)
- mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
- : cm->reference_mode == COMPOUND_REFERENCE;
-
- compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
-
- tmp_best_rdu =
- best_rd - AOMMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
- RDCOST(x->rdmult, x->rddiv, 0, total_sse));
-
- if (tmp_best_rdu > 0) {
- // If even the 'Y' rd value of split is higher than best so far
- // then dont bother looking at UV
- int is_cost_valid_uv;
- RD_STATS rd_stats_uv;
- av1_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, NULL,
- BLOCK_8X8);
-#if CONFIG_VAR_TX
- is_cost_valid_uv =
- inter_block_uvrd(cpi, x, &rd_stats_uv, BLOCK_8X8, tmp_best_rdu);
-#else
- is_cost_valid_uv =
- super_block_uvrd(cpi, x, &rd_stats_uv, BLOCK_8X8, tmp_best_rdu);
-#endif // CONFIG_VAR_TX
- rate_uv = rd_stats_uv.rate;
- distortion_uv = rd_stats_uv.dist;
- uv_skippable = rd_stats_uv.skip;
- uv_sse = rd_stats_uv.sse;
-
- if (!is_cost_valid_uv) continue;
- rate2 += rate_uv;
- distortion2 += distortion_uv;
- skippable = skippable && uv_skippable;
- total_sse += uv_sse;
- } else {
- continue;
- }
- }
-
- if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
-
- // Estimate the reference frame signaling cost and add it
- // to the rolling cost variable.
- if (second_ref_frame > INTRA_FRAME) {
- rate2 += ref_costs_comp[ref_frame];
-#if CONFIG_EXT_REFS
- rate2 += ref_costs_comp[second_ref_frame];
-#endif // CONFIG_EXT_REFS
- } else {
- rate2 += ref_costs_single[ref_frame];
- }
-
- if (!disable_skip) {
- // Skip is never coded at the segment level for sub8x8 blocks and instead
- // always coded in the bitstream at the mode info level.
-
- if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
- RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
- // Add in the cost of the no skip flag.
- rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
- } else {
- // FIXME(rbultje) make this work for splitmv also
- rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
- distortion2 = total_sse;
- assert(total_sse >= 0);
- rate2 -= (rate_y + rate_uv);
- rate_y = 0;
- rate_uv = 0;
- this_skip2 = 1;
- }
- } else {
- // Add in the cost of the no skip flag.
- rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
- }
-
- // Calculate the final RD estimate for this mode.
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- }
-
- if (!disable_skip && ref_frame == INTRA_FRAME) {
- for (i = 0; i < REFERENCE_MODES; ++i)
- best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
- }
-
- // Did this mode help.. i.e. is it the new best mode
- if (this_rd < best_rd || x->skip) {
- if (!mode_excluded) {
- // Note index of best mode so far
- best_ref_index = ref_index;
-
- if (ref_frame == INTRA_FRAME) {
- /* required for left and above block mv */
- mbmi->mv[0].as_int = 0;
- }
-
- rd_cost->rate = rate2;
-#if CONFIG_SUPERTX
- *returnrate_nocoef = rate2 - rate_y - rate_uv;
- if (!disable_skip)
- *returnrate_nocoef -=
- av1_cost_bit(av1_get_skip_prob(cm, xd), this_skip2);
- *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
- mbmi->ref_frame[0] != INTRA_FRAME);
- assert(*returnrate_nocoef > 0);
-#endif // CONFIG_SUPERTX
- rd_cost->dist = distortion2;
- rd_cost->rdcost = this_rd;
- best_rd = this_rd;
- best_yrd =
- best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
- best_mbmode = *mbmi;
- best_skip2 = this_skip2;
-
-#if CONFIG_VAR_TX
- for (i = 0; i < MAX_MB_PLANE; ++i)
- memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk);
-#endif // CONFIG_VAR_TX
-
- for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];
- }
- }
-
- /* keep record of best compound/single-only prediction */
- if (!disable_skip && ref_frame != INTRA_FRAME) {
- int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
-
- if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- single_rate = rate2 - compmode_cost;
- hybrid_rate = rate2;
- } else {
- single_rate = rate2;
- hybrid_rate = rate2 + compmode_cost;
- }
-
- single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
- hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
-
- if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
- best_pred_rd[SINGLE_REFERENCE] = single_rd;
- else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
- best_pred_rd[COMPOUND_REFERENCE] = single_rd;
-
- if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
- best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
- }
-
- if (x->skip && !comp_pred) break;
- }
-
- if (best_rd >= best_rd_so_far) {
- rd_cost->rate = INT_MAX;
- rd_cost->rdcost = INT64_MAX;
-#if CONFIG_SUPERTX
- *returnrate_nocoef = INT_MAX;
-#endif // CONFIG_SUPERTX
- return;
- }
-
- if (best_rd == INT64_MAX) {
- rd_cost->rate = INT_MAX;
- rd_cost->dist = INT64_MAX;
- rd_cost->rdcost = INT64_MAX;
-#if CONFIG_SUPERTX
- *returnrate_nocoef = INT_MAX;
-#endif // CONFIG_SUPERTX
- return;
- }
-
-#if CONFIG_DUAL_FILTER
- assert((cm->interp_filter == SWITCHABLE) ||
- (cm->interp_filter == best_mbmode.interp_filter[0]) ||
- !is_inter_block(&best_mbmode));
-#else
- assert((cm->interp_filter == SWITCHABLE) ||
- (cm->interp_filter == best_mbmode.interp_filter) ||
- !is_inter_block(&best_mbmode));
-#endif // CONFIG_DUAL_FILTER
-
- av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
- sf->adaptive_rd_thresh, bsize, best_ref_index);
-
- // macroblock modes
- *mbmi = best_mbmode;
-#if CONFIG_VAR_TX
- mbmi->inter_tx_size[0][0] = mbmi->tx_size;
-#endif // CONFIG_VAR_TX
-
- x->skip |= best_skip2;
- if (!is_inter_block(&best_mbmode)) {
- for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
- } else {
- for (i = 0; i < 4; ++i)
- memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
-
-#if CONFIG_REF_MV
- mbmi->pred_mv[0].as_int = xd->mi[0]->bmi[3].pred_mv[0].as_int;
- mbmi->pred_mv[1].as_int = xd->mi[0]->bmi[3].pred_mv[1].as_int;
-#endif // CONFIG_REF_MV
- mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
- mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
- }
-
-// Note: this section is needed since the mode may have been forced to ZEROMV
-#if CONFIG_GLOBAL_MOTION
- if (mbmi->mode == ZEROMV
-#if CONFIG_EXT_INTER
- || mbmi->mode == ZERO_ZEROMV
-#endif // CONFIG_EXT_INTER
- ) {
- if (is_nontrans_global_motion(xd)) {
-#if CONFIG_DUAL_FILTER
- mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
- ? EIGHTTAP_REGULAR
- : cm->interp_filter;
- mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
- ? EIGHTTAP_REGULAR
- : cm->interp_filter;
-#else
- mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
- : cm->interp_filter;
-#endif // CONFIG_DUAL_FILTER
- }
- }
-#endif // CONFIG_GLOBAL_MOTION
-
- for (i = 0; i < REFERENCE_MODES; ++i) {
- if (best_pred_rd[i] == INT64_MAX)
- best_pred_diff[i] = INT_MIN;
- else
- best_pred_diff[i] = best_rd - best_pred_rd[i];
- }
-
- store_coding_context(x, ctx, best_ref_index, best_pred_diff, 0);
-}
-
#if CONFIG_MOTION_VAR
// This function has a structure similar to av1_build_obmc_inter_prediction
//
@@ -12454,9 +10658,14 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
i = 0;
do { // for each mi in the above row
const int mi_col_offset = i;
- const MB_MODE_INFO *const above_mbmi =
+ const MB_MODE_INFO *above_mbmi =
&xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
- const BLOCK_SIZE a_bsize = above_mbmi->sb_type;
+#if CONFIG_CHROMA_SUB8X8
+ if (above_mbmi->sb_type < BLOCK_8X8)
+ above_mbmi =
+ &xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi;
+#endif
+ const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
const int neighbor_bw = mi_step * MI_SIZE;
@@ -12528,9 +10737,15 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
i = 0;
do { // for each mi in the left column
const int mi_row_offset = i;
- const MB_MODE_INFO *const left_mbmi =
+ MB_MODE_INFO *left_mbmi =
&xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
- const BLOCK_SIZE l_bsize = left_mbmi->sb_type;
+
+#if CONFIG_CHROMA_SUB8X8
+ if (left_mbmi->sb_type < BLOCK_8X8)
+ left_mbmi =
+ &xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi;
+#endif
+ const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
const int neighbor_bh = mi_step * MI_SIZE;
@@ -12636,7 +10851,7 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
av1_setup_dst_planes(x->e_mbd.plane, bsize,
get_frame_new_buffer(&cpi->common), mi_row, mi_col);
- av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, bsize);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
av1_subtract_plane(x, bsize, 0);
super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
diff --git a/third_party/aom/av1/encoder/rdopt.h b/third_party/aom/av1/encoder/rdopt.h
index a7053b289..e5d778fe5 100644
--- a/third_party/aom/av1/encoder/rdopt.h
+++ b/third_party/aom/av1/encoder/rdopt.h
@@ -62,6 +62,12 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
OUTPUT_STATUS output_status);
+#if CONFIG_DAALA_DIST
+int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
+ int dst_stride, int bsw, int bsh, int qm,
+ int use_activity_masking, int qindex);
+#endif
+
#if !CONFIG_PVQ || CONFIG_VAR_TX
int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
@@ -101,16 +107,6 @@ int av1_active_h_edge(const struct AV1_COMP *cpi, int mi_row, int mi_step);
int av1_active_v_edge(const struct AV1_COMP *cpi, int mi_col, int mi_step);
int av1_active_edge_sb(const struct AV1_COMP *cpi, int mi_row, int mi_col);
-void av1_rd_pick_inter_mode_sub8x8(const struct AV1_COMP *cpi,
- struct TileDataEnc *tile_data,
- struct macroblock *x, int mi_row, int mi_col,
- struct RD_STATS *rd_cost,
-#if CONFIG_SUPERTX
- int *returnrate_nocoef,
-#endif // CONFIG_SUPERTX
- BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
- int64_t best_rd_so_far);
-
#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
int mi_row, int mi_col);
diff --git a/third_party/aom/av1/encoder/speed_features.c b/third_party/aom/av1/encoder/speed_features.c
index 20c96761b..e2275a54f 100644
--- a/third_party/aom/av1/encoder/speed_features.c
+++ b/third_party/aom/av1/encoder/speed_features.c
@@ -139,8 +139,10 @@ static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi,
}
}
-static void set_good_speed_feature(AV1_COMP *cpi, AV1_COMMON *cm,
- SPEED_FEATURES *sf, int speed) {
+static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
+ SPEED_FEATURES *sf,
+ int speed) {
+ AV1_COMMON *const cm = &cpi->common;
const int boosted = frame_is_boosted(cpi);
if (speed >= 1) {
@@ -205,6 +207,9 @@ static void set_good_speed_feature(AV1_COMP *cpi, AV1_COMMON *cm,
#if CONFIG_EXT_TX
sf->tx_type_search.prune_mode = PRUNE_TWO;
#endif
+#if CONFIG_GLOBAL_MOTION
+ sf->gm_search_type = GM_DISABLE_SEARCH;
+#endif // CONFIG_GLOBAL_MOTION
}
if (speed >= 4) {
@@ -286,6 +291,12 @@ static void set_good_speed_feature(AV1_COMP *cpi, AV1_COMMON *cm,
sf->coeff_prob_appx_step = 4;
sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
}
+ if (speed >= 8) {
+ sf->mv.search_method = FAST_DIAMOND;
+ sf->mv.fullpel_search_step_param = 10;
+ sf->mv.subpel_force_stop = 2;
+ sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
+ }
}
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) {
@@ -339,12 +350,13 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) {
}
void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
- SPEED_FEATURES *const sf = &cpi->sf;
AV1_COMMON *const cm = &cpi->common;
+ SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCK *const x = &cpi->td.mb;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
int i;
+ (void)cm;
// best quality defaults
sf->frame_parameter_update = 1;
sf->mv.search_method = NSTEP;
@@ -418,13 +430,16 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
// Set this at the appropriate speed levels
sf->use_transform_domain_distortion = 0;
+#if CONFIG_GLOBAL_MOTION
+ sf->gm_search_type = GM_FULL_SEARCH;
+#endif // CONFIG_GLOBAL_MOTION
if (oxcf->mode == GOOD
#if CONFIG_XIPHRC
|| oxcf->pass == 1
#endif
)
- set_good_speed_feature(cpi, cm, sf, oxcf->speed);
+ set_good_speed_features_framesize_independent(cpi, sf, oxcf->speed);
// sf->partition_search_breakout_dist_thr is set assuming max 64x64
// blocks. Normalise this if the blocks are bigger.
diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h
index af54a1a9a..5710d77c7 100644
--- a/third_party/aom/av1/encoder/speed_features.h
+++ b/third_party/aom/av1/encoder/speed_features.h
@@ -24,6 +24,9 @@ enum {
(1 << D207_PRED) | (1 << D63_PRED) |
#if CONFIG_ALT_INTRA
(1 << SMOOTH_PRED) |
+#if CONFIG_SMOOTH_HV
+ (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) |
+#endif // CONFIG_SMOOTH_HV
#endif // CONFIG_ALT_INTRA
(1 << TM_PRED),
INTRA_DC = (1 << DC_PRED),
@@ -36,37 +39,33 @@ enum {
#if CONFIG_EXT_INTER
enum {
INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV) |
- (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) |
- (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) | (1 << NEW_NEWMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | (1 << NEW_NEWMV) |
(1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | (1 << NEW_NEARMV) |
(1 << NEW_NEARESTMV) | (1 << ZERO_ZEROMV),
INTER_NEAREST = (1 << NEARESTMV) | (1 << NEAREST_NEARESTMV) |
- (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
(1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV),
INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV) |
(1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) |
- (1 << NEAR_NEARESTMV) | (1 << NEAREST_NEARMV) |
(1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
(1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) |
(1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
- (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
(1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV),
- INTER_NEAREST_NEW_ZERO =
- (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) |
- (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEW_NEWMV) |
- (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) |
- (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
- INTER_NEAREST_NEAR_NEW =
- (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) |
- (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEARMV) |
- (1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
- (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV),
- INTER_NEAREST_NEAR_ZERO =
- (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
- (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEAREST_NEARMV) |
- (1 << NEAR_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
- (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV),
+ INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
+ (1 << NEW_NEWMV) | (1 << NEW_NEARESTMV) |
+ (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) |
+ (1 << NEAR_NEWMV),
+ INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) |
+ (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
+ (1 << NEAR_NEARMV),
+ INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
+ (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
+ (1 << NEAR_NEARMV),
};
#else
enum {
@@ -196,14 +195,7 @@ typedef enum {
// Always use a fixed size partition
FIXED_PARTITION,
- REFERENCE_PARTITION,
-
- // Use an arbitrary partitioning scheme based on source variance within
- // a 64X64 SB
- VAR_BASED_PARTITION,
-
- // Use non-fixed partitions based on source variance
- SOURCE_VAR_BASED_PARTITION
+ REFERENCE_PARTITION
} PARTITION_SEARCH_TYPE;
typedef enum {
@@ -251,6 +243,14 @@ typedef struct MESH_PATTERN {
int interval;
} MESH_PATTERN;
+#if CONFIG_GLOBAL_MOTION
+typedef enum {
+ GM_FULL_SEARCH,
+ GM_REDUCED_REF_SEARCH,
+ GM_DISABLE_SEARCH
+} GM_SEARCH_TYPE;
+#endif // CONFIG_GLOBAL_MOTION
+
typedef struct SPEED_FEATURES {
MV_SPEED_FEATURES mv;
@@ -432,7 +432,7 @@ typedef struct SPEED_FEATURES {
// TODO(aconverse): Fold this into one of the other many mode skips
BLOCK_SIZE max_intra_bsize;
- // The frequency that we check if SOURCE_VAR_BASED_PARTITION or
+ // The frequency that we check if
// FIXED_PARTITION search type should be used.
int search_type_check_frequency;
@@ -470,6 +470,10 @@ typedef struct SPEED_FEATURES {
// Whether to compute distortion in the image domain (slower but
// more accurate), or in the transform domain (faster but less acurate).
int use_transform_domain_distortion;
+
+#if CONFIG_GLOBAL_MOTION
+ GM_SEARCH_TYPE gm_search_type;
+#endif // CONFIG_GLOBAL_MOTION
} SPEED_FEATURES;
struct AV1_COMP;
diff --git a/third_party/aom/av1/encoder/subexp.c b/third_party/aom/av1/encoder/subexp.c
index 8960d3341..6a8ba12d8 100644
--- a/third_party/aom/av1/encoder/subexp.c
+++ b/third_party/aom/av1/encoder/subexp.c
@@ -179,83 +179,6 @@ int av1_prob_diff_update_savings_search_model(const unsigned int *ct,
return bestsavings;
}
-#if CONFIG_SUBFRAME_PROB_UPDATE
-static int get_cost(unsigned int ct[][2], aom_prob p, int n) {
- int i, p0 = p;
- unsigned int total_ct[2] = { 0, 0 };
- int cost = 0;
-
- for (i = 0; i <= n; ++i) {
- cost += cost_branch256(ct[i], p);
- total_ct[0] += ct[i][0];
- total_ct[1] += ct[i][1];
- if (i < n)
- p = av1_merge_probs(p0, total_ct, COEF_COUNT_SAT, COEF_MAX_UPDATE_FACTOR);
- }
- return cost;
-}
-
-int av1_prob_update_search_subframe(unsigned int ct[][2], aom_prob oldp,
- aom_prob *bestp, aom_prob upd, int n) {
- const int old_b = get_cost(ct, oldp, n);
- int bestsavings = 0;
- const int upd_cost = av1_cost_one(upd) - av1_cost_zero(upd);
- aom_prob newp, bestnewp = oldp;
- const int step = *bestp > oldp ? -1 : 1;
-
- for (newp = *bestp; newp != oldp; newp += step) {
- const int new_b = get_cost(ct, newp, n);
- const int update_b = prob_diff_update_cost(newp, oldp) + upd_cost;
- const int savings = old_b - new_b - update_b;
- if (savings > bestsavings) {
- bestsavings = savings;
- bestnewp = newp;
- }
- }
- *bestp = bestnewp;
- return bestsavings;
-}
-
-int av1_prob_update_search_model_subframe(
- unsigned int ct[ENTROPY_NODES][COEF_PROBS_BUFS][2], const aom_prob *oldp,
- aom_prob *bestp, aom_prob upd, int stepsize, int n) {
- int i, old_b, new_b, update_b, savings, bestsavings;
- int newp;
- const int step_sign = *bestp > oldp[PIVOT_NODE] ? -1 : 1;
- const int step = stepsize * step_sign;
- const int upd_cost = av1_cost_one(upd) - av1_cost_zero(upd);
- aom_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
- av1_model_to_full_probs(oldp, oldplist);
- memcpy(newplist, oldp, sizeof(aom_prob) * UNCONSTRAINED_NODES);
- for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i)
- old_b += get_cost(ct[i], oldplist[i], n);
- old_b += get_cost(ct[PIVOT_NODE], oldplist[PIVOT_NODE], n);
-
- bestsavings = 0;
- bestnewp = oldp[PIVOT_NODE];
-
- assert(stepsize > 0);
-
- for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0; newp += step) {
- if (newp < 1 || newp > 255) continue;
- newplist[PIVOT_NODE] = newp;
- av1_model_to_full_probs(newplist, newplist);
- for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
- new_b += get_cost(ct[i], newplist[i], n);
- new_b += get_cost(ct[PIVOT_NODE], newplist[PIVOT_NODE], n);
- update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + upd_cost;
- savings = old_b - new_b - update_b;
- if (savings > bestsavings) {
- bestsavings = savings;
- bestnewp = newp;
- }
- }
-
- *bestp = bestnewp;
- return bestsavings;
-}
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
-
void av1_cond_prob_diff_update(aom_writer *w, aom_prob *oldp,
const unsigned int ct[2], int probwt) {
const aom_prob upd = DIFF_UPDATE_PROB;
diff --git a/third_party/aom/av1/encoder/subexp.h b/third_party/aom/av1/encoder/subexp.h
index 049265cb8..580edabdb 100644
--- a/third_party/aom/av1/encoder/subexp.h
+++ b/third_party/aom/av1/encoder/subexp.h
@@ -35,13 +35,6 @@ int av1_prob_diff_update_savings_search_model(const unsigned int *ct,
int av1_cond_prob_diff_update_savings(aom_prob *oldp, const unsigned int ct[2],
int probwt);
-#if CONFIG_SUBFRAME_PROB_UPDATE
-int av1_prob_update_search_subframe(unsigned int ct[][2], aom_prob oldp,
- aom_prob *bestp, aom_prob upd, int n);
-int av1_prob_update_search_model_subframe(
- unsigned int ct[ENTROPY_NODES][COEF_PROBS_BUFS][2], const aom_prob *oldp,
- aom_prob *bestp, aom_prob upd, int stepsize, int n);
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/aom/av1/encoder/temporal_filter.c b/third_party/aom/av1/encoder/temporal_filter.c
index de962fe84..1ed1ebdb2 100644
--- a/third_party/aom/av1/encoder/temporal_filter.c
+++ b/third_party/aom/av1/encoder/temporal_filter.c
@@ -281,14 +281,10 @@ static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi,
av1_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
-#if CONFIG_REF_MV
x->mvcost = x->mv_cost_stack[0];
x->nmvjointcost = x->nmv_vec_cost[0];
- x->mvsadcost = x->mvcost;
- x->nmvjointsadcost = x->nmvjointcost;
-#endif
- // Ignore mv costing by sending NULL pointer instead of cost arrays
+ // Use mv costing from x->mvcost directly
av1_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
cond_cost_list(cpi, cost_list), &cpi->fn_ptr[BLOCK_16X16], 0,
&best_ref_mv1);
@@ -299,8 +295,11 @@ static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi,
bestsme = cpi->find_fractional_mv_step(
x, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
- cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
- 0);
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
+#if CONFIG_EXT_INTER
+ NULL, 0, 0,
+#endif
+ 0, 0, 0);
x->e_mbd.mi[0]->bmi[0].as_mv[0] = x->best_mv;
diff --git a/third_party/aom/av1/encoder/tokenize.c b/third_party/aom/av1/encoder/tokenize.c
index f48493bf8..18d2cd958 100644
--- a/third_party/aom/av1/encoder/tokenize.c
+++ b/third_party/aom/av1/encoder/tokenize.c
@@ -23,6 +23,9 @@
#include "av1/encoder/cost.h"
#include "av1/encoder/encoder.h"
+#if CONFIG_LV_MAP
+#include "av1/encoder/encodetxb.c"
+#endif
#include "av1/encoder/rdopt.h"
#include "av1/encoder/tokenize.h"
@@ -261,20 +264,6 @@ const av1_extra_bit av1_extra_bits[ENTROPY_TOKENS] = {
};
#endif
-#if !CONFIG_EC_MULTISYMBOL
-const struct av1_token av1_coef_encodings[ENTROPY_TOKENS] = {
- { 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 },
- { 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 }
-};
-#endif // !CONFIG_EC_MULTISYMBOL
-
-struct tokenize_b_args {
- const AV1_COMP *cpi;
- ThreadData *td;
- TOKENEXTRA **tp;
- int this_rate;
-};
-
#if !CONFIG_PVQ || CONFIG_VAR_TX
static void cost_coeffs_b(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
@@ -314,7 +303,6 @@ static void set_entropy_context_b(int plane, int block, int blk_row,
blk_row);
}
-#if CONFIG_NEW_TOKENSET
static INLINE void add_token(TOKENEXTRA **t,
aom_cdf_prob (*tail_cdf)[CDF_SIZE(ENTROPY_TOKENS)],
aom_cdf_prob (*head_cdf)[CDF_SIZE(ENTROPY_TOKENS)],
@@ -328,25 +316,6 @@ static INLINE void add_token(TOKENEXTRA **t,
(*t)->first_val = first_val;
(*t)++;
}
-
-#else // CONFIG_NEW_TOKENSET
-static INLINE void add_token(
- TOKENEXTRA **t, const aom_prob *context_tree,
-#if CONFIG_EC_MULTISYMBOL
- aom_cdf_prob (*token_cdf)[CDF_SIZE(ENTROPY_TOKENS)],
-#endif // CONFIG_EC_MULTISYMBOL
- int32_t extra, uint8_t token, uint8_t skip_eob_node, unsigned int *counts) {
- (*t)->token = token;
- (*t)->extra = extra;
- (*t)->context_tree = context_tree;
-#if CONFIG_EC_MULTISYMBOL
- (*t)->token_cdf = token_cdf;
-#endif // CONFIG_EC_MULTISYMBOL
- (*t)->skip_eob_node = skip_eob_node;
- (*t)++;
- ++counts[token];
-}
-#endif // CONFIG_NEW_TOKENSET
#endif // !CONFIG_PVQ || CONFIG_VAR_TX
#if CONFIG_PALETTE
@@ -471,22 +440,11 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
const int ref = is_inter_block(mbmi);
unsigned int(*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref];
-#if !CONFIG_NEW_TOKENSET
-#if CONFIG_SUBFRAME_PROB_UPDATE
- const aom_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
- cpi->subframe_stats.coef_probs_buf[cpi->common.coef_probs_update_idx]
- [txsize_sqr_map[tx_size]][type][ref];
-#else
- aom_prob(*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
- cpi->common.fc->coef_probs[txsize_sqr_map[tx_size]][type][ref];
-#endif // CONFIG_SUBFRAME_PROB_UPDATE
-#endif // !CONFIG_NEW_TOKENSET
#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#elif CONFIG_EC_MULTISYMBOL
+#else
FRAME_CONTEXT *ec_ctx = cpi->common.fc;
#endif
-#if CONFIG_NEW_TOKENSET
aom_cdf_prob(
*const coef_head_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] =
ec_ctx->coef_head_cdfs[txsize_sqr_map[tx_size]][type][ref];
@@ -497,13 +455,6 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
td->counts->blockz_count[txsize_sqr_map[tx_size]][type][ref];
int eob_val;
int first_val = 1;
-#else
-#if CONFIG_EC_MULTISYMBOL
- aom_cdf_prob(*const coef_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] =
- ec_ctx->coef_cdfs[txsize_sqr_map[tx_size]][type][ref];
-#endif
- int skip_eob = 0;
-#endif
const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
unsigned int(*const eob_branch)[COEFF_CONTEXTS] =
td->counts->eob_branch[txsize_sqr_map[tx_size]][type][ref];
@@ -517,7 +468,6 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
nb = scan_order->neighbors;
c = 0;
-#if CONFIG_NEW_TOKENSET
if (eob == 0)
add_token(&t, &coef_tail_cdfs[band[c]][pt], &coef_head_cdfs[band[c]][pt], 1,
1, 0, BLOCK_Z_TOKEN);
@@ -553,33 +503,6 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
++c;
pt = get_coef_context(nb, token_cache, AOMMIN(c, eob - 1));
}
-#else
- while (c < eob) {
- const int v = qcoeff[scan[c]];
- eob_branch[band[c]][pt] += !skip_eob;
-
- av1_get_token_extra(v, &token, &extra);
-
- add_token(&t, coef_probs[band[c]][pt],
-#if CONFIG_EC_MULTISYMBOL
- &coef_cdfs[band[c]][pt],
-#endif
- extra, (uint8_t)token, (uint8_t)skip_eob, counts[band[c]][pt]);
-
- token_cache[scan[c]] = av1_pt_energy_class[token];
- ++c;
- pt = get_coef_context(nb, token_cache, c);
- skip_eob = (token == ZERO_TOKEN);
- }
- if (c < seg_eob) {
- add_token(&t, coef_probs[band[c]][pt],
-#if CONFIG_EC_MULTISYMBOL
- NULL,
-#endif
- 0, EOB_TOKEN, 0, counts[band[c]][pt]);
- ++eob_branch[band[c]][pt];
- }
-#endif // CONFIG_NEW_TOKENSET
#if CONFIG_COEF_INTERLEAVE
t->token = EOSB_TOKEN;
@@ -651,6 +574,18 @@ void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
if (tx_size == plane_tx_size) {
plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
+#if CONFIG_LV_MAP
+ if (!dry_run) {
+ av1_update_and_record_txb_context(plane, block, blk_row, blk_col,
+ plane_bsize, tx_size, arg);
+ } else if (dry_run == DRY_RUN_NORMAL) {
+ av1_update_txb_context_b(plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, arg);
+ } else {
+ printf("DRY_RUN_COSTCOEFFS is not supported yet\n");
+ assert(0);
+ }
+#else
if (!dry_run)
tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
else if (dry_run == DRY_RUN_NORMAL)
@@ -658,6 +593,7 @@ void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
tx_size, arg);
else if (dry_run == DRY_RUN_COSTCOEFFS)
cost_coeffs_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
+#endif
} else {
// Half the block size in transform block unit.
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
@@ -688,7 +624,11 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_LV_MAP
+ (void)t;
+#else
TOKENEXTRA *t_backup = *t;
+#endif
const int ctx = av1_get_skip_context(xd);
const int skip_inc =
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
@@ -698,22 +638,25 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
if (mbmi->skip) {
if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
- reset_skip_context(xd, bsize);
+ av1_reset_skip_context(xd, mi_row, mi_col, bsize);
+#if !CONFIG_LV_MAP
if (dry_run) *t = t_backup;
+#endif
return;
}
- if (!dry_run)
- td->counts->skip[ctx][0] += skip_inc;
+ if (!dry_run) td->counts->skip[ctx][0] += skip_inc;
+#if !CONFIG_LV_MAP
else
*t = t_backup;
+#endif
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
#if CONFIG_CB4X4
if (!is_chroma_reference(mi_row, mi_col, bsize,
xd->plane[plane].subsampling_x,
xd->plane[plane].subsampling_y)) {
-#if !CONFIG_PVQ
+#if !CONFIG_PVQ || !CONFIG_LV_MAP
if (!dry_run) {
(*t)->token = EOSB_TOKEN;
(*t)++;
@@ -746,10 +689,12 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
}
}
+#if !CONFIG_LV_MAP
if (!dry_run) {
(*t)->token = EOSB_TOKEN;
(*t)++;
}
+#endif
}
if (rate) *rate += arg.this_rate;
}
@@ -768,7 +713,7 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
struct tokenize_b_args arg = { cpi, td, t, 0 };
if (mbmi->skip) {
if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
- reset_skip_context(xd, bsize);
+ av1_reset_skip_context(xd, mi_row, mi_col, bsize);
return;
}
@@ -843,8 +788,8 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
#if CONFIG_SUPERTX
void av1_tokenize_sb_supertx(const AV1_COMP *cpi, ThreadData *td,
- TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
- int *rate) {
+ TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int *rate) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &td->mb.e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -855,7 +800,7 @@ void av1_tokenize_sb_supertx(const AV1_COMP *cpi, ThreadData *td,
struct tokenize_b_args arg = { cpi, td, t, 0 };
if (mbmi->skip) {
if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
- reset_skip_context(xd, bsize);
+ av1_reset_skip_context(xd, mi_row, mi_col, bsize);
if (dry_run) *t = t_backup;
return;
}
diff --git a/third_party/aom/av1/encoder/tokenize.h b/third_party/aom/av1/encoder/tokenize.h
index 3928111d6..cbfa3cd91 100644
--- a/third_party/aom/av1/encoder/tokenize.h
+++ b/third_party/aom/av1/encoder/tokenize.h
@@ -35,14 +35,10 @@ typedef struct {
} TOKENVALUE;
typedef struct {
-#if CONFIG_NEW_TOKENSET
aom_cdf_prob (*tail_cdf)[CDF_SIZE(ENTROPY_TOKENS)];
aom_cdf_prob (*head_cdf)[CDF_SIZE(ENTROPY_TOKENS)];
int eob_val;
int first_val;
-#elif CONFIG_EC_MULTISYMBOL
- aom_cdf_prob (*token_cdf)[CDF_SIZE(ENTROPY_TOKENS)];
-#endif
const aom_prob *context_tree;
EXTRABIT extra;
uint8_t token;
@@ -51,15 +47,19 @@ typedef struct {
extern const aom_tree_index av1_coef_tree[];
extern const aom_tree_index av1_coef_con_tree[];
-#if !CONFIG_EC_MULTISYMBOL
-extern const struct av1_token av1_coef_encodings[];
-#endif // !CONFIG_EC_MULTISYMBOL
int av1_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
struct AV1_COMP;
struct ThreadData;
+struct tokenize_b_args {
+ const struct AV1_COMP *cpi;
+ struct ThreadData *td;
+ TOKENEXTRA **tp;
+ int this_rate;
+};
+
typedef enum {
OUTPUT_ENABLED = 0,
DRY_RUN_NORMAL,
@@ -85,8 +85,8 @@ void av1_tokenize_sb(const struct AV1_COMP *cpi, struct ThreadData *td,
int *rate, const int mi_row, const int mi_col);
#if CONFIG_SUPERTX
void av1_tokenize_sb_supertx(const struct AV1_COMP *cpi, struct ThreadData *td,
- TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
- int *rate);
+ TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int *rate);
#endif
extern const int16_t *av1_dct_value_cost_ptr;
diff --git a/third_party/aom/av1/encoder/variance_tree.c b/third_party/aom/av1/encoder/variance_tree.c
deleted file mode 100644
index 9384cd78e..000000000
--- a/third_party/aom/av1/encoder/variance_tree.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/encoder/variance_tree.h"
-#include "av1/encoder/encoder.h"
-
-void av1_setup_var_tree(struct AV1Common *cm, ThreadData *td) {
- int i, j;
-#if CONFIG_EXT_PARTITION
- const int leaf_nodes = 1024;
- const int tree_nodes = 1024 + 256 + 64 + 16 + 4 + 1;
-#else
- const int leaf_nodes = 256;
- const int tree_nodes = 256 + 64 + 16 + 4 + 1;
-#endif // CONFIG_EXT_PARTITION
- int index = 0;
- VAR_TREE *this_var;
- int nodes;
-
- aom_free(td->var_tree);
- CHECK_MEM_ERROR(cm, td->var_tree,
- aom_calloc(tree_nodes, sizeof(*td->var_tree)));
-
- this_var = &td->var_tree[0];
-
- // Sets up all the leaf nodes in the tree.
- for (index = 0; index < leaf_nodes; ++index) {
- VAR_TREE *const leaf = &td->var_tree[index];
- leaf->split[0] = NULL;
- }
-
- // Each node has 4 leaf nodes, fill in the child pointers
- // from leafs to the root.
- for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
- for (i = 0; i < nodes; ++i, ++index) {
- VAR_TREE *const node = &td->var_tree[index];
- for (j = 0; j < 4; j++) node->split[j] = this_var++;
- }
- }
-
- // Set up the root node for the largest superblock size
- i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2;
- td->var_root[i] = &td->var_tree[tree_nodes - 1];
- // Set up the root nodes for the rest of the possible superblock sizes
- while (--i >= 0) {
- td->var_root[i] = td->var_root[i + 1]->split[0];
- }
-}
-
-void av1_free_var_tree(ThreadData *td) {
- aom_free(td->var_tree);
- td->var_tree = NULL;
-}
diff --git a/third_party/aom/av1/encoder/variance_tree.h b/third_party/aom/av1/encoder/variance_tree.h
deleted file mode 100644
index a9f27302e..000000000
--- a/third_party/aom/av1/encoder/variance_tree.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AV1_ENCODER_VARIANCE_TREE_H_
-#define AV1_ENCODER_VARIANCE_TREE_H_
-
-#include <assert.h>
-
-#include "./aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#include "av1/common/enums.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct AV1Common;
-struct ThreadData;
-
-typedef struct {
- int64_t sum_square_error;
- int64_t sum_error;
- int log2_count;
- int variance;
-} VAR;
-
-typedef struct {
- VAR none;
- VAR horz[2];
- VAR vert[2];
-} partition_variance;
-
-typedef struct VAR_TREE {
- int force_split;
- partition_variance variances;
- struct VAR_TREE *split[4];
- BLOCK_SIZE bsize;
- const uint8_t *src;
- const uint8_t *ref;
- int src_stride;
- int ref_stride;
- int width;
- int height;
-#if CONFIG_HIGHBITDEPTH
- int highbd;
-#endif // CONFIG_HIGHBITDEPTH
-} VAR_TREE;
-
-void av1_setup_var_tree(struct AV1Common *cm, struct ThreadData *td);
-void av1_free_var_tree(struct ThreadData *td);
-
-// Set variance values given sum square error, sum error, count.
-static INLINE void fill_variance(int64_t s2, int64_t s, int c, VAR *v) {
- v->sum_square_error = s2;
- v->sum_error = s;
- v->log2_count = c;
- v->variance =
- (int)(256 * (v->sum_square_error -
- ((v->sum_error * v->sum_error) >> v->log2_count)) >>
- v->log2_count);
-}
-
-static INLINE void sum_2_variances(const VAR *a, const VAR *b, VAR *r) {
- assert(a->log2_count == b->log2_count);
- fill_variance(a->sum_square_error + b->sum_square_error,
- a->sum_error + b->sum_error, a->log2_count + 1, r);
-}
-
-static INLINE void fill_variance_node(VAR_TREE *vt) {
- sum_2_variances(&vt->split[0]->variances.none, &vt->split[1]->variances.none,
- &vt->variances.horz[0]);
- sum_2_variances(&vt->split[2]->variances.none, &vt->split[3]->variances.none,
- &vt->variances.horz[1]);
- sum_2_variances(&vt->split[0]->variances.none, &vt->split[2]->variances.none,
- &vt->variances.vert[0]);
- sum_2_variances(&vt->split[1]->variances.none, &vt->split[3]->variances.none,
- &vt->variances.vert[1]);
- sum_2_variances(&vt->variances.vert[0], &vt->variances.vert[1],
- &vt->variances.none);
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif /* AV1_ENCODER_VARIANCE_TREE_H_ */
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
index f9c95b6cb..190317389 100644
--- a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
+++ b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
@@ -15,13 +15,65 @@
#include "./av1_rtcd.h"
#include "aom/aom_integer.h"
-void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
+static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
+ __m128i *c0, __m128i *c1) {
+ const tran_low_t *addr = coeff + offset;
+#if CONFIG_HIGHBITDEPTH
+ const __m128i x0 = _mm_load_si128((const __m128i *)addr);
+ const __m128i x1 = _mm_load_si128((const __m128i *)addr + 1);
+ const __m128i x2 = _mm_load_si128((const __m128i *)addr + 2);
+ const __m128i x3 = _mm_load_si128((const __m128i *)addr + 3);
+ *c0 = _mm_packs_epi32(x0, x1);
+ *c1 = _mm_packs_epi32(x2, x3);
+#else
+ *c0 = _mm_load_si128((const __m128i *)addr);
+ *c1 = _mm_load_si128((const __m128i *)addr + 1);
+#endif
+}
+
+static INLINE void write_qcoeff(const __m128i *qc0, const __m128i *qc1,
+ tran_low_t *qcoeff, intptr_t offset) {
+ tran_low_t *addr = qcoeff + offset;
+#if CONFIG_HIGHBITDEPTH
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sign_bits = _mm_cmplt_epi16(*qc0, zero);
+ __m128i y0 = _mm_unpacklo_epi16(*qc0, sign_bits);
+ __m128i y1 = _mm_unpackhi_epi16(*qc0, sign_bits);
+ _mm_store_si128((__m128i *)addr, y0);
+ _mm_store_si128((__m128i *)addr + 1, y1);
+
+ sign_bits = _mm_cmplt_epi16(*qc1, zero);
+ y0 = _mm_unpacklo_epi16(*qc1, sign_bits);
+ y1 = _mm_unpackhi_epi16(*qc1, sign_bits);
+ _mm_store_si128((__m128i *)addr + 2, y0);
+ _mm_store_si128((__m128i *)addr + 3, y1);
+#else
+ _mm_store_si128((__m128i *)addr, *qc0);
+ _mm_store_si128((__m128i *)addr + 1, *qc1);
+#endif
+}
+
+static INLINE void write_zero(tran_low_t *qcoeff, intptr_t offset) {
+ const __m128i zero = _mm_setzero_si128();
+ tran_low_t *addr = qcoeff + offset;
+#if CONFIG_HIGHBITDEPTH
+ _mm_store_si128((__m128i *)addr, zero);
+ _mm_store_si128((__m128i *)addr + 1, zero);
+ _mm_store_si128((__m128i *)addr + 2, zero);
+ _mm_store_si128((__m128i *)addr + 3, zero);
+#else
+ _mm_store_si128((__m128i *)addr, zero);
+ _mm_store_si128((__m128i *)addr + 1, zero);
+#endif
+}
+
+void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
- int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
__m128i zero;
__m128i thr;
int16_t nzflag;
@@ -54,8 +106,7 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
__m128i qcoeff0, qcoeff1;
__m128i qtmp0, qtmp1;
// Do DC and first 15 AC
- coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
- coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
+ read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -78,15 +129,13 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs);
}
{
@@ -121,8 +170,7 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
__m128i qcoeff0, qcoeff1;
__m128i qtmp0, qtmp1;
- coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
- coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
+ read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -147,20 +195,15 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs);
} else {
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
-
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ write_zero(qcoeff_ptr, n_coeffs);
+ write_zero(dqcoeff_ptr, n_coeffs);
}
}
@@ -200,10 +243,8 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
}
} else {
do {
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+ write_zero(dqcoeff_ptr, n_coeffs);
+ write_zero(qcoeff_ptr, n_coeffs);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;
diff --git a/third_party/aom/av1/encoder/x86/corner_match_sse4.c b/third_party/aom/av1/encoder/x86/corner_match_sse4.c
new file mode 100644
index 000000000..179da0d28
--- /dev/null
+++ b/third_party/aom/av1/encoder/x86/corner_match_sse4.c
@@ -0,0 +1,91 @@
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include <assert.h>
+
+#include <smmintrin.h>
+
+#include "./av1_rtcd.h"
+#include "aom_ports/mem.h"
+#include "av1/encoder/corner_match.h"
+
+DECLARE_ALIGNED(16, static const uint8_t, byte_mask[16]) = {
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0
+};
+#if MATCH_SZ != 13
+#error "Need to change byte_mask in corner_match_sse4.c if MATCH_SZ != 13"
+#endif
+
+/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the
+ correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows
+ of each image, centered at (x1, y1) and (x2, y2) respectively.
+*/
+double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1,
+ int y1, unsigned char *im2, int stride2,
+ int x2, int y2) {
+ int i;
+ // 2 16-bit partial sums in lanes 0, 4 (== 2 32-bit partial sums in lanes 0,
+ // 2)
+ __m128i sum1_vec = _mm_setzero_si128();
+ __m128i sum2_vec = _mm_setzero_si128();
+ // 4 32-bit partial sums of squares
+ __m128i sumsq2_vec = _mm_setzero_si128();
+ __m128i cross_vec = _mm_setzero_si128();
+
+ const __m128i mask = _mm_load_si128((__m128i *)byte_mask);
+ const __m128i zero = _mm_setzero_si128();
+
+ im1 += (y1 - MATCH_SZ_BY2) * stride1 + (x1 - MATCH_SZ_BY2);
+ im2 += (y2 - MATCH_SZ_BY2) * stride2 + (x2 - MATCH_SZ_BY2);
+
+ for (i = 0; i < MATCH_SZ; ++i) {
+ const __m128i v1 =
+ _mm_and_si128(_mm_loadu_si128((__m128i *)&im1[i * stride1]), mask);
+ const __m128i v2 =
+ _mm_and_si128(_mm_loadu_si128((__m128i *)&im2[i * stride2]), mask);
+
+ // Using the 'sad' intrinsic here is a bit faster than adding
+ // v1_l + v1_r and v2_l + v2_r, plus it avoids the need for a 16->32 bit
+ // conversion step later, for a net speedup of ~10%
+ sum1_vec = _mm_add_epi16(sum1_vec, _mm_sad_epu8(v1, zero));
+ sum2_vec = _mm_add_epi16(sum2_vec, _mm_sad_epu8(v2, zero));
+
+ const __m128i v1_l = _mm_cvtepu8_epi16(v1);
+ const __m128i v1_r = _mm_cvtepu8_epi16(_mm_srli_si128(v1, 8));
+ const __m128i v2_l = _mm_cvtepu8_epi16(v2);
+ const __m128i v2_r = _mm_cvtepu8_epi16(_mm_srli_si128(v2, 8));
+
+ sumsq2_vec = _mm_add_epi32(
+ sumsq2_vec,
+ _mm_add_epi32(_mm_madd_epi16(v2_l, v2_l), _mm_madd_epi16(v2_r, v2_r)));
+ cross_vec = _mm_add_epi32(
+ cross_vec,
+ _mm_add_epi32(_mm_madd_epi16(v1_l, v2_l), _mm_madd_epi16(v1_r, v2_r)));
+ }
+
+ // Now we can treat the four registers (sum1_vec, sum2_vec, sumsq2_vec,
+ // cross_vec)
+ // as holding 4 32-bit elements each, which we want to sum horizontally.
+ // We do this by transposing and then summing vertically.
+ __m128i tmp_0 = _mm_unpacklo_epi32(sum1_vec, sum2_vec);
+ __m128i tmp_1 = _mm_unpackhi_epi32(sum1_vec, sum2_vec);
+ __m128i tmp_2 = _mm_unpacklo_epi32(sumsq2_vec, cross_vec);
+ __m128i tmp_3 = _mm_unpackhi_epi32(sumsq2_vec, cross_vec);
+
+ __m128i tmp_4 = _mm_unpacklo_epi64(tmp_0, tmp_2);
+ __m128i tmp_5 = _mm_unpackhi_epi64(tmp_0, tmp_2);
+ __m128i tmp_6 = _mm_unpacklo_epi64(tmp_1, tmp_3);
+ __m128i tmp_7 = _mm_unpackhi_epi64(tmp_1, tmp_3);
+
+ __m128i res =
+ _mm_add_epi32(_mm_add_epi32(tmp_4, tmp_5), _mm_add_epi32(tmp_6, tmp_7));
+
+ int sum1 = _mm_extract_epi32(res, 0);
+ int sum2 = _mm_extract_epi32(res, 1);
+ int sumsq2 = _mm_extract_epi32(res, 2);
+ int cross = _mm_extract_epi32(res, 3);
+
+ int var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2;
+ int cov = cross * MATCH_SZ_SQ - sum1 * sum2;
+ return cov / sqrt((double)var2);
+}
diff --git a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
index f201a29aa..b56eed518 100644
--- a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -13,7 +13,7 @@
#include "./av1_rtcd.h"
#include "./aom_config.h"
-#include "av1/common/av1_fwd_txfm2d_cfg.h"
+#include "av1/common/av1_fwd_txfm1d_cfg.h"
#include "av1/common/av1_txfm.h"
#include "av1/common/x86/highbd_txfm_utility_sse4.h"
#include "aom_dsp/txfm_common.h"
@@ -58,7 +58,7 @@ static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
// shift[1] is used in txfm_func_col()
// shift[2] is used in txfm_func_row()
static void fdct4x4_sse4_1(__m128i *in, int bit) {
- const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const int32_t *cospi = cospi_arr(bit);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
@@ -133,7 +133,7 @@ void av1_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output,
}
static void fadst4x4_sse4_1(__m128i *in, int bit) {
- const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const int32_t *cospi = cospi_arr(bit);
const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
@@ -209,71 +209,81 @@ static void fadst4x4_sse4_1(__m128i *in, int bit) {
void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff,
int input_stride, int tx_type, int bd) {
__m128i in[4];
- const TXFM_2D_CFG *cfg = NULL;
+ const TXFM_1D_CFG *row_cfg = NULL;
+ const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
- cfg = &fwd_txfm_2d_cfg_dct_dct_4;
- load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
- fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
+ fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case ADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_4;
- load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
- fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
+ fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case DCT_ADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_4;
- load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
- fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
+ fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case ADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_4;
- load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
- fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]);
+ fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_4;
- load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
- fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+ load_buffer_4x4(input, in, input_stride, 1, 0, row_cfg->shift[0]);
+ fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fdct4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case DCT_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_4;
- load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
- fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_4;
+ load_buffer_4x4(input, in, input_stride, 0, 1, row_cfg->shift[0]);
+ fdct4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case FLIPADST_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_4;
- load_buffer_4x4(input, in, input_stride, 1, 1, cfg->shift[0]);
- fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+ load_buffer_4x4(input, in, input_stride, 1, 1, row_cfg->shift[0]);
+ fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case ADST_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_4;
- load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
- fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 1, row_cfg->shift[0]);
+ fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
case FLIPADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_4;
- load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
- fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
- fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_4;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_4;
+ load_buffer_4x4(input, in, input_stride, 1, 0, row_cfg->shift[0]);
+ fadst4x4_sse4_1(in, col_cfg->cos_bit[2]);
+ fadst4x4_sse4_1(in, row_cfg->cos_bit[2]);
write_buffer_4x4(in, coeff);
break;
#endif
@@ -429,7 +439,7 @@ static INLINE void write_buffer_8x8(const __m128i *res, tran_low_t *output) {
}
static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
- const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const int32_t *cospi = cospi_arr(bit);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
@@ -625,7 +635,7 @@ static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
}
static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
- const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const int32_t *cospi = cospi_arr(bit);
const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
@@ -930,97 +940,107 @@ static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
int tx_type, int bd) {
__m128i in[16], out[16];
- const TXFM_2D_CFG *cfg = NULL;
+ const TXFM_1D_CFG *row_cfg = NULL;
+ const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
- cfg = &fwd_txfm_2d_cfg_dct_dct_8;
- load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
- fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
+ load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
+ fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case ADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_8;
- load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case DCT_ADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_8;
- load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
- fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
+ load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
+ fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case ADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_8;
- load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_8;
- load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+ load_buffer_8x8(input, in, stride, 1, 0, row_cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case DCT_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_8;
- load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
- fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_8;
+ load_buffer_8x8(input, in, stride, 0, 1, row_cfg->shift[0]);
+ fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case FLIPADST_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_8;
- load_buffer_8x8(input, in, stride, 1, 1, cfg->shift[0]);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+ load_buffer_8x8(input, in, stride, 1, 1, row_cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case ADST_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_8;
- load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 1, row_cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
case FLIPADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_8;
- load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
- col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_8;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_8;
+ load_buffer_8x8(input, in, stride, 1, 0, row_cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]);
+ col_txfm_8x8_rounding(out, -row_cfg->shift[1]);
transpose_8x8(out, in);
- fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]);
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
@@ -1107,7 +1127,7 @@ static INLINE void load_buffer_16x16(const int16_t *input, __m128i *out,
}
static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
- const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const int32_t *cospi = cospi_arr(bit);
const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
@@ -1393,7 +1413,7 @@ static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
}
static void fadst16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
- const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const int32_t *cospi = cospi_arr(bit);
const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
@@ -1794,97 +1814,107 @@ static void write_buffer_16x16(const __m128i *in, tran_low_t *output) {
void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff,
int stride, int tx_type, int bd) {
__m128i in[64], out[64];
- const TXFM_2D_CFG *cfg = NULL;
+ const TXFM_1D_CFG *row_cfg = NULL;
+ const TXFM_1D_CFG *col_cfg = NULL;
switch (tx_type) {
case DCT_DCT:
- cfg = &fwd_txfm_2d_cfg_dct_dct_16;
- load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
- fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
+ load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
+ fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case ADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_16;
- load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case DCT_ADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_16;
- load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
- fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
+ load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
+ fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case ADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_16;
- load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_16;
- load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_dct_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+ load_buffer_16x16(input, in, stride, 1, 0, row_cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case DCT_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_16;
- load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
- fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_dct_16;
+ load_buffer_16x16(input, in, stride, 0, 1, row_cfg->shift[0]);
+ fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case FLIPADST_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_16;
- load_buffer_16x16(input, in, stride, 1, 1, cfg->shift[0]);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+ load_buffer_16x16(input, in, stride, 1, 1, row_cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case ADST_FLIPADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_16;
- load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 1, row_cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
case FLIPADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_16;
- load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
- col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ row_cfg = &fwd_txfm_1d_row_cfg_adst_16;
+ col_cfg = &fwd_txfm_1d_col_cfg_adst_16;
+ load_buffer_16x16(input, in, stride, 1, 0, row_cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]);
+ col_txfm_16x16_rounding(out, -row_cfg->shift[1]);
transpose_16x16(out, in);
- fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]);
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
diff --git a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
index 198e4e4c4..8495ad1aa 100644
--- a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
+++ b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
@@ -269,8 +269,8 @@ static void fdct16_avx2(__m256i *in) {
x0 = _mm256_unpacklo_epi16(v0, v1);
x1 = _mm256_unpackhi_epi16(v0, v1);
- t0 = butter_fly(x0, x1, cospi_p16_p16);
- t1 = butter_fly(x0, x1, cospi_p16_m16);
+ t0 = butter_fly(&x0, &x1, &cospi_p16_p16);
+ t1 = butter_fly(&x0, &x1, &cospi_p16_m16);
// 4, 12
v0 = _mm256_sub_epi16(s1, s2);
@@ -279,8 +279,8 @@ static void fdct16_avx2(__m256i *in) {
x0 = _mm256_unpacklo_epi16(v0, v1);
x1 = _mm256_unpackhi_epi16(v0, v1);
- t2 = butter_fly(x0, x1, cospi_p24_p08);
- t3 = butter_fly(x0, x1, cospi_m08_p24);
+ t2 = butter_fly(&x0, &x1, &cospi_p24_p08);
+ t3 = butter_fly(&x0, &x1, &cospi_m08_p24);
// 2, 6, 10, 14
s0 = _mm256_sub_epi16(u3, u4);
@@ -294,8 +294,8 @@ static void fdct16_avx2(__m256i *in) {
x0 = _mm256_unpacklo_epi16(s2, s1);
x1 = _mm256_unpackhi_epi16(s2, s1);
- v2 = butter_fly(x0, x1, cospi_p16_p16); // output[5]
- v1 = butter_fly(x0, x1, cospi_p16_m16); // output[6]
+ v2 = butter_fly(&x0, &x1, &cospi_p16_p16); // output[5]
+ v1 = butter_fly(&x0, &x1, &cospi_p16_m16); // output[6]
s0 = _mm256_add_epi16(v0, v1); // step[4]
s1 = _mm256_sub_epi16(v0, v1); // step[5]
@@ -306,14 +306,14 @@ static void fdct16_avx2(__m256i *in) {
x0 = _mm256_unpacklo_epi16(s0, s3);
x1 = _mm256_unpackhi_epi16(s0, s3);
- t4 = butter_fly(x0, x1, cospi_p28_p04);
- t5 = butter_fly(x0, x1, cospi_m04_p28);
+ t4 = butter_fly(&x0, &x1, &cospi_p28_p04);
+ t5 = butter_fly(&x0, &x1, &cospi_m04_p28);
// 10, 6
x0 = _mm256_unpacklo_epi16(s1, s2);
x1 = _mm256_unpackhi_epi16(s1, s2);
- t6 = butter_fly(x0, x1, cospi_p12_p20);
- t7 = butter_fly(x0, x1, cospi_m20_p12);
+ t6 = butter_fly(&x0, &x1, &cospi_p12_p20);
+ t7 = butter_fly(&x0, &x1, &cospi_m20_p12);
// 1, 3, 5, 7, 9, 11, 13, 15
s0 = _mm256_sub_epi16(in[7], in[8]); // step[8]
@@ -337,14 +337,14 @@ static void fdct16_avx2(__m256i *in) {
x0 = _mm256_unpacklo_epi16(u5, u2);
x1 = _mm256_unpackhi_epi16(u5, u2);
- s2 = butter_fly(x0, x1, cospi_p16_p16); // step[13]
- s5 = butter_fly(x0, x1, cospi_p16_m16); // step[10]
+ s2 = butter_fly(&x0, &x1, &cospi_p16_p16); // step[13]
+ s5 = butter_fly(&x0, &x1, &cospi_p16_m16); // step[10]
x0 = _mm256_unpacklo_epi16(u4, u3);
x1 = _mm256_unpackhi_epi16(u4, u3);
- s3 = butter_fly(x0, x1, cospi_p16_p16); // step[12]
- s4 = butter_fly(x0, x1, cospi_p16_m16); // step[11]
+ s3 = butter_fly(&x0, &x1, &cospi_p16_p16); // step[12]
+ s4 = butter_fly(&x0, &x1, &cospi_p16_m16); // step[11]
u0 = _mm256_add_epi16(s0, s4); // output[8]
u1 = _mm256_add_epi16(s1, s5);
@@ -364,14 +364,14 @@ static void fdct16_avx2(__m256i *in) {
x0 = _mm256_unpacklo_epi16(u1, u6);
x1 = _mm256_unpackhi_epi16(u1, u6);
- s1 = butter_fly(x0, x1, cospi_m08_p24);
- s6 = butter_fly(x0, x1, cospi_p24_p08);
+ s1 = butter_fly(&x0, &x1, &cospi_m08_p24);
+ s6 = butter_fly(&x0, &x1, &cospi_p24_p08);
x0 = _mm256_unpacklo_epi16(u2, u5);
x1 = _mm256_unpackhi_epi16(u2, u5);
- s2 = butter_fly(x0, x1, cospi_m24_m08);
- s5 = butter_fly(x0, x1, cospi_m08_p24);
+ s2 = butter_fly(&x0, &x1, &cospi_m24_m08);
+ s5 = butter_fly(&x0, &x1, &cospi_m08_p24);
// stage 5
u0 = _mm256_add_epi16(s0, s1);
@@ -386,23 +386,23 @@ static void fdct16_avx2(__m256i *in) {
// stage 6
x0 = _mm256_unpacklo_epi16(u0, u7);
x1 = _mm256_unpackhi_epi16(u0, u7);
- in[1] = butter_fly(x0, x1, cospi_p30_p02);
- in[15] = butter_fly(x0, x1, cospi_m02_p30);
+ in[1] = butter_fly(&x0, &x1, &cospi_p30_p02);
+ in[15] = butter_fly(&x0, &x1, &cospi_m02_p30);
x0 = _mm256_unpacklo_epi16(u1, u6);
x1 = _mm256_unpackhi_epi16(u1, u6);
- in[9] = butter_fly(x0, x1, cospi_p14_p18);
- in[7] = butter_fly(x0, x1, cospi_m18_p14);
+ in[9] = butter_fly(&x0, &x1, &cospi_p14_p18);
+ in[7] = butter_fly(&x0, &x1, &cospi_m18_p14);
x0 = _mm256_unpacklo_epi16(u2, u5);
x1 = _mm256_unpackhi_epi16(u2, u5);
- in[5] = butter_fly(x0, x1, cospi_p22_p10);
- in[11] = butter_fly(x0, x1, cospi_m10_p22);
+ in[5] = butter_fly(&x0, &x1, &cospi_p22_p10);
+ in[11] = butter_fly(&x0, &x1, &cospi_m10_p22);
x0 = _mm256_unpacklo_epi16(u3, u4);
x1 = _mm256_unpackhi_epi16(u3, u4);
- in[13] = butter_fly(x0, x1, cospi_p06_p26);
- in[3] = butter_fly(x0, x1, cospi_m26_p06);
+ in[13] = butter_fly(&x0, &x1, &cospi_p06_p26);
+ in[3] = butter_fly(&x0, &x1, &cospi_m26_p06);
}
void fadst16_avx2(__m256i *in) {
@@ -953,7 +953,9 @@ void fadst16_avx2(__m256i *in) {
}
#if CONFIG_EXT_TX
-static void fidtx16_avx2(__m256i *in) { txfm_scaling16_avx2(Sqrt2, in); }
+static void fidtx16_avx2(__m256i *in) {
+ txfm_scaling16_avx2((int16_t)Sqrt2, in);
+}
#endif
void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
@@ -964,28 +966,28 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
case DCT_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fdct16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fdct16_avx2(in);
break;
case ADST_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fadst16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fdct16_avx2(in);
break;
case DCT_ADST:
load_buffer_16x16(input, stride, 0, 0, in);
fdct16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case ADST_ADST:
load_buffer_16x16(input, stride, 0, 0, in);
fadst16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
@@ -993,91 +995,91 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
case FLIPADST_DCT:
load_buffer_16x16(input, stride, 1, 0, in);
fadst16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fdct16_avx2(in);
break;
case DCT_FLIPADST:
load_buffer_16x16(input, stride, 0, 1, in);
fdct16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case FLIPADST_FLIPADST:
load_buffer_16x16(input, stride, 1, 1, in);
fadst16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case ADST_FLIPADST:
load_buffer_16x16(input, stride, 0, 1, in);
fadst16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case FLIPADST_ADST:
load_buffer_16x16(input, stride, 1, 0, in);
fadst16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case IDTX:
load_buffer_16x16(input, stride, 0, 0, in);
fidtx16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fidtx16_avx2(in);
break;
case V_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fdct16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fidtx16_avx2(in);
break;
case H_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fidtx16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fdct16_avx2(in);
break;
case V_ADST:
load_buffer_16x16(input, stride, 0, 0, in);
fadst16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fidtx16_avx2(in);
break;
case H_ADST:
load_buffer_16x16(input, stride, 0, 0, in);
fidtx16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
case V_FLIPADST:
load_buffer_16x16(input, stride, 1, 0, in);
fadst16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fidtx16_avx2(in);
break;
case H_FLIPADST:
load_buffer_16x16(input, stride, 0, 1, in);
fidtx16_avx2(in);
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
right_shift_16x16(in);
fadst16_avx2(in);
break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
write_buffer_16x16(in, output);
_mm256_zeroupper();
}
@@ -1110,10 +1112,10 @@ static void mm256_vectors_swap(__m256i *a0, __m256i *a1, const int size) {
}
static void mm256_transpose_32x32(__m256i *in0, __m256i *in1) {
- mm256_transpose_16x16(in0);
- mm256_transpose_16x16(&in0[16]);
- mm256_transpose_16x16(in1);
- mm256_transpose_16x16(&in1[16]);
+ mm256_transpose_16x16(in0, in0);
+ mm256_transpose_16x16(&in0[16], &in0[16]);
+ mm256_transpose_16x16(in1, in1);
+ mm256_transpose_16x16(&in1[16], &in1[16]);
mm256_vectors_swap(&in0[16], in1, 16);
}
@@ -1247,23 +1249,23 @@ static void fdct16_odd_avx2(__m256i *in) {
u0 = _mm256_unpacklo_epi16(in[4], in[11]);
u1 = _mm256_unpackhi_epi16(in[4], in[11]);
- y4 = butter_fly(u0, u1, cospi_m16_p16);
- y11 = butter_fly(u0, u1, cospi_p16_p16);
+ y4 = butter_fly(&u0, &u1, &cospi_m16_p16);
+ y11 = butter_fly(&u0, &u1, &cospi_p16_p16);
u0 = _mm256_unpacklo_epi16(in[5], in[10]);
u1 = _mm256_unpackhi_epi16(in[5], in[10]);
- y5 = butter_fly(u0, u1, cospi_m16_p16);
- y10 = butter_fly(u0, u1, cospi_p16_p16);
+ y5 = butter_fly(&u0, &u1, &cospi_m16_p16);
+ y10 = butter_fly(&u0, &u1, &cospi_p16_p16);
u0 = _mm256_unpacklo_epi16(in[6], in[9]);
u1 = _mm256_unpackhi_epi16(in[6], in[9]);
- y6 = butter_fly(u0, u1, cospi_m16_p16);
- y9 = butter_fly(u0, u1, cospi_p16_p16);
+ y6 = butter_fly(&u0, &u1, &cospi_m16_p16);
+ y9 = butter_fly(&u0, &u1, &cospi_p16_p16);
u0 = _mm256_unpacklo_epi16(in[7], in[8]);
u1 = _mm256_unpackhi_epi16(in[7], in[8]);
- y7 = butter_fly(u0, u1, cospi_m16_p16);
- y8 = butter_fly(u0, u1, cospi_p16_p16);
+ y7 = butter_fly(&u0, &u1, &cospi_m16_p16);
+ y8 = butter_fly(&u0, &u1, &cospi_p16_p16);
y12 = in[12];
y13 = in[13];
@@ -1300,23 +1302,23 @@ static void fdct16_odd_avx2(__m256i *in) {
u0 = _mm256_unpacklo_epi16(x2, x13);
u1 = _mm256_unpackhi_epi16(x2, x13);
- y2 = butter_fly(u0, u1, cospi_m08_p24);
- y13 = butter_fly(u0, u1, cospi_p24_p08);
+ y2 = butter_fly(&u0, &u1, &cospi_m08_p24);
+ y13 = butter_fly(&u0, &u1, &cospi_p24_p08);
u0 = _mm256_unpacklo_epi16(x3, x12);
u1 = _mm256_unpackhi_epi16(x3, x12);
- y3 = butter_fly(u0, u1, cospi_m08_p24);
- y12 = butter_fly(u0, u1, cospi_p24_p08);
+ y3 = butter_fly(&u0, &u1, &cospi_m08_p24);
+ y12 = butter_fly(&u0, &u1, &cospi_p24_p08);
u0 = _mm256_unpacklo_epi16(x4, x11);
u1 = _mm256_unpackhi_epi16(x4, x11);
- y4 = butter_fly(u0, u1, cospi_m24_m08);
- y11 = butter_fly(u0, u1, cospi_m08_p24);
+ y4 = butter_fly(&u0, &u1, &cospi_m24_m08);
+ y11 = butter_fly(&u0, &u1, &cospi_m08_p24);
u0 = _mm256_unpacklo_epi16(x5, x10);
u1 = _mm256_unpackhi_epi16(x5, x10);
- y5 = butter_fly(u0, u1, cospi_m24_m08);
- y10 = butter_fly(u0, u1, cospi_m08_p24);
+ y5 = butter_fly(&u0, &u1, &cospi_m24_m08);
+ y10 = butter_fly(&u0, &u1, &cospi_m08_p24);
// stage 5
x0 = _mm256_add_epi16(y0, y3);
@@ -1349,23 +1351,23 @@ static void fdct16_odd_avx2(__m256i *in) {
u0 = _mm256_unpacklo_epi16(x1, x14);
u1 = _mm256_unpackhi_epi16(x1, x14);
- y1 = butter_fly(u0, u1, cospi_m04_p28);
- y14 = butter_fly(u0, u1, cospi_p28_p04);
+ y1 = butter_fly(&u0, &u1, &cospi_m04_p28);
+ y14 = butter_fly(&u0, &u1, &cospi_p28_p04);
u0 = _mm256_unpacklo_epi16(x2, x13);
u1 = _mm256_unpackhi_epi16(x2, x13);
- y2 = butter_fly(u0, u1, cospi_m28_m04);
- y13 = butter_fly(u0, u1, cospi_m04_p28);
+ y2 = butter_fly(&u0, &u1, &cospi_m28_m04);
+ y13 = butter_fly(&u0, &u1, &cospi_m04_p28);
u0 = _mm256_unpacklo_epi16(x5, x10);
u1 = _mm256_unpackhi_epi16(x5, x10);
- y5 = butter_fly(u0, u1, cospi_m20_p12);
- y10 = butter_fly(u0, u1, cospi_p12_p20);
+ y5 = butter_fly(&u0, &u1, &cospi_m20_p12);
+ y10 = butter_fly(&u0, &u1, &cospi_p12_p20);
u0 = _mm256_unpacklo_epi16(x6, x9);
u1 = _mm256_unpackhi_epi16(x6, x9);
- y6 = butter_fly(u0, u1, cospi_m12_m20);
- y9 = butter_fly(u0, u1, cospi_m20_p12);
+ y6 = butter_fly(&u0, &u1, &cospi_m12_m20);
+ y9 = butter_fly(&u0, &u1, &cospi_m20_p12);
// stage 7
x0 = _mm256_add_epi16(y0, y1);
@@ -1389,43 +1391,43 @@ static void fdct16_odd_avx2(__m256i *in) {
// stage 8
u0 = _mm256_unpacklo_epi16(x0, x15);
u1 = _mm256_unpackhi_epi16(x0, x15);
- in[0] = butter_fly(u0, u1, cospi_p31_p01);
- in[15] = butter_fly(u0, u1, cospi_m01_p31);
+ in[0] = butter_fly(&u0, &u1, &cospi_p31_p01);
+ in[15] = butter_fly(&u0, &u1, &cospi_m01_p31);
u0 = _mm256_unpacklo_epi16(x1, x14);
u1 = _mm256_unpackhi_epi16(x1, x14);
- in[1] = butter_fly(u0, u1, cospi_p15_p17);
- in[14] = butter_fly(u0, u1, cospi_m17_p15);
+ in[1] = butter_fly(&u0, &u1, &cospi_p15_p17);
+ in[14] = butter_fly(&u0, &u1, &cospi_m17_p15);
u0 = _mm256_unpacklo_epi16(x2, x13);
u1 = _mm256_unpackhi_epi16(x2, x13);
- in[2] = butter_fly(u0, u1, cospi_p23_p09);
- in[13] = butter_fly(u0, u1, cospi_m09_p23);
+ in[2] = butter_fly(&u0, &u1, &cospi_p23_p09);
+ in[13] = butter_fly(&u0, &u1, &cospi_m09_p23);
u0 = _mm256_unpacklo_epi16(x3, x12);
u1 = _mm256_unpackhi_epi16(x3, x12);
- in[3] = butter_fly(u0, u1, cospi_p07_p25);
- in[12] = butter_fly(u0, u1, cospi_m25_p07);
+ in[3] = butter_fly(&u0, &u1, &cospi_p07_p25);
+ in[12] = butter_fly(&u0, &u1, &cospi_m25_p07);
u0 = _mm256_unpacklo_epi16(x4, x11);
u1 = _mm256_unpackhi_epi16(x4, x11);
- in[4] = butter_fly(u0, u1, cospi_p27_p05);
- in[11] = butter_fly(u0, u1, cospi_m05_p27);
+ in[4] = butter_fly(&u0, &u1, &cospi_p27_p05);
+ in[11] = butter_fly(&u0, &u1, &cospi_m05_p27);
u0 = _mm256_unpacklo_epi16(x5, x10);
u1 = _mm256_unpackhi_epi16(x5, x10);
- in[5] = butter_fly(u0, u1, cospi_p11_p21);
- in[10] = butter_fly(u0, u1, cospi_m21_p11);
+ in[5] = butter_fly(&u0, &u1, &cospi_p11_p21);
+ in[10] = butter_fly(&u0, &u1, &cospi_m21_p11);
u0 = _mm256_unpacklo_epi16(x6, x9);
u1 = _mm256_unpackhi_epi16(x6, x9);
- in[6] = butter_fly(u0, u1, cospi_p19_p13);
- in[9] = butter_fly(u0, u1, cospi_m13_p19);
+ in[6] = butter_fly(&u0, &u1, &cospi_p19_p13);
+ in[9] = butter_fly(&u0, &u1, &cospi_m13_p19);
u0 = _mm256_unpacklo_epi16(x7, x8);
u1 = _mm256_unpackhi_epi16(x7, x8);
- in[7] = butter_fly(u0, u1, cospi_p03_p29);
- in[8] = butter_fly(u0, u1, cospi_m29_p03);
+ in[7] = butter_fly(&u0, &u1, &cospi_p03_p29);
+ in[8] = butter_fly(&u0, &u1, &cospi_m29_p03);
}
static void fdct32_avx2(__m256i *in0, __m256i *in1) {
@@ -1464,7 +1466,7 @@ static INLINE void write_buffer_32x32(const __m256i *in0, const __m256i *in1,
static void fhalfright32_16col_avx2(__m256i *in) {
int i = 0;
const __m256i zero = _mm256_setzero_si256();
- const __m256i sqrt2 = _mm256_set1_epi16(Sqrt2);
+ const __m256i sqrt2 = _mm256_set1_epi16((int16_t)Sqrt2);
const __m256i dct_rounding = _mm256_set1_epi32(DCT_CONST_ROUNDING);
__m256i x0, x1;